From 24e89c97cbbdd89348b1f02497a129ac8ac0a14f Mon Sep 17 00:00:00 2001 From: isabelle-dr Date: Tue, 22 Mar 2022 19:33:40 -0400 Subject: re arrange repo --- .idea/gtfs-book/ch-00-definitive-guide-to-gtfs.md | 55 --- .idea/gtfs-book/ch-01-introduction.md | 45 -- .idea/gtfs-book/ch-02-agencies.md | 47 -- .idea/gtfs-book/ch-03-stops.md | 117 ----- .idea/gtfs-book/ch-04-routes.md | 82 ---- .idea/gtfs-book/ch-05-trips.md | 222 --------- .idea/gtfs-book/ch-06-stop-times.md | 160 ------- .idea/gtfs-book/ch-07-calendar.md | 138 ------ .idea/gtfs-book/ch-08-fares.md | 115 ----- .idea/gtfs-book/ch-09-shapes.md | 76 --- .idea/gtfs-book/ch-10-frequencies.md | 98 ---- .idea/gtfs-book/ch-11-stop-transfers.md | 65 --- .idea/gtfs-book/ch-12-feed-information.md | 36 -- .idea/gtfs-book/ch-13-importing-to-sql.md | 125 ----- .idea/gtfs-book/ch-14-switching-to-integer-ids.md | 111 ----- .idea/gtfs-book/ch-15-optimizing-shapes.md | 158 ------- .idea/gtfs-book/ch-16-deleting-unused-data.md | 106 ----- .idea/gtfs-book/ch-17-searching-for-trips.md | 346 -------------- .idea/gtfs-book/ch-18-working-with-trip-blocks.md | 109 ----- .idea/gtfs-book/ch-19-calculating-fares.md | 296 ------------ .idea/gtfs-book/ch-20-trip-patterns.md | 141 ------ .idea/gtfs-book/ch-21-conclusion.md | 23 - .idea/gtfs-book/images/agency-google-maps.png | Bin 348035 -> 0 bytes .idea/gtfs-book/images/block-01.png | Bin 20153 -> 0 bytes .idea/gtfs-book/images/block-02.png | Bin 19817 -> 0 bytes .idea/gtfs-book/images/block-03.png | Bin 19154 -> 0 bytes .idea/gtfs-book/images/block-04.png | Bin 20673 -> 0 bytes .idea/gtfs-book/images/orthogonal-distance.jpeg | Bin 23287 -> 0 bytes .idea/gtfs-book/images/shape-original.jpeg | Bin 402743 -> 0 bytes .idea/gtfs-book/images/shape-reduced.jpeg | Bin 401253 -> 0 bytes .idea/gtfs-book/images/stop-times.png | Bin 454610 -> 0 bytes .idea/gtfs-book/images/stops-sample.png | Bin 129647 -> 0 bytes .../ch-00-definitive-guide-to-gtfs-realtime.md | 58 --- .idea/gtfs-realtime-book/ch-01-introduction.md | 81 ---- .idea/gtfs-realtime-book/ch-02-service-alerts.md | 363 --------------- .../gtfs-realtime-book/ch-03-vehicle-positions.md | 317 ------------- .idea/gtfs-realtime-book/ch-04-trip-updates.md | 217 --------- .idea/gtfs-realtime-book/ch-05-protocol-buffers.md | 379 --------------- .../ch-06-consuming-service-alerts.md | 305 ------------ .../ch-07-consuming-vehicle-positions.md | 509 --------------------- .../ch-08-consuming-trip-updates.md | 380 --------------- .idea/gtfs-realtime-book/ch-09-database-storage.md | 492 -------------------- .idea/gtfs-realtime-book/ch-10-extensions.md | 374 --------------- .idea/gtfs-realtime-book/ch-11-publishing-feeds.md | 348 -------------- .idea/gtfs-realtime-book/ch-12-conclusion.md | 29 -- .../images/GTFS-realtime-consumption.png | Bin 34733 -> 0 bytes .../GTFS-realtime-direct-or-intermediate.png | Bin 27796 -> 0 bytes .../images/GTFS-realtime-structure.png | Bin 44185 -> 0 bytes .idea/gtfs-realtime-book/images/Modified-Feed.png | Bin 23445 -> 0 bytes .../images/bearing-from-position.png | Bin 672483 -> 0 bytes .idea/gtfs-realtime-book/images/boston-trip.png | Bin 520901 -> 0 bytes .idea/gtfs-realtime-book/images/mbta_bus.png | Bin 242072 -> 0 bytes .../gtfs-realtime-book/images/remaining_stops.png | Bin 190083 -> 0 bytes .idea/gtfs-realtime-book/images/stoptime_map.png | Bin 1140001 -> 0 bytes .../gtfs-realtime-book/images/vehicle-position.png | Bin 912061 -> 0 bytes gtfs-book/ch-00-definitive-guide-to-gtfs.md | 55 +++ gtfs-book/ch-01-introduction.md | 45 ++ gtfs-book/ch-02-agencies.md | 47 ++ gtfs-book/ch-03-stops.md | 117 +++++ gtfs-book/ch-04-routes.md | 82 ++++ gtfs-book/ch-05-trips.md | 222 +++++++++ gtfs-book/ch-06-stop-times.md | 160 +++++++ gtfs-book/ch-07-calendar.md | 138 ++++++ gtfs-book/ch-08-fares.md | 115 +++++ gtfs-book/ch-09-shapes.md | 76 +++ gtfs-book/ch-10-frequencies.md | 98 ++++ gtfs-book/ch-11-stop-transfers.md | 65 +++ gtfs-book/ch-12-feed-information.md | 36 ++ gtfs-book/ch-13-importing-to-sql.md | 125 +++++ gtfs-book/ch-14-switching-to-integer-ids.md | 111 +++++ gtfs-book/ch-15-optimizing-shapes.md | 158 +++++++ gtfs-book/ch-16-deleting-unused-data.md | 106 +++++ gtfs-book/ch-17-searching-for-trips.md | 346 ++++++++++++++ gtfs-book/ch-18-working-with-trip-blocks.md | 109 +++++ gtfs-book/ch-19-calculating-fares.md | 296 ++++++++++++ gtfs-book/ch-20-trip-patterns.md | 141 ++++++ gtfs-book/ch-21-conclusion.md | 23 + gtfs-book/images/agency-google-maps.png | Bin 0 -> 348035 bytes gtfs-book/images/block-01.png | Bin 0 -> 20153 bytes gtfs-book/images/block-02.png | Bin 0 -> 19817 bytes gtfs-book/images/block-03.png | Bin 0 -> 19154 bytes gtfs-book/images/block-04.png | Bin 0 -> 20673 bytes gtfs-book/images/orthogonal-distance.jpeg | Bin 0 -> 23287 bytes gtfs-book/images/shape-original.jpeg | Bin 0 -> 402743 bytes gtfs-book/images/shape-reduced.jpeg | Bin 0 -> 401253 bytes gtfs-book/images/stop-times.png | Bin 0 -> 454610 bytes gtfs-book/images/stops-sample.png | Bin 0 -> 129647 bytes .../ch-00-definitive-guide-to-gtfs-realtime.md | 58 +++ gtfs-realtime-book/ch-01-introduction.md | 81 ++++ gtfs-realtime-book/ch-02-service-alerts.md | 363 +++++++++++++++ gtfs-realtime-book/ch-03-vehicle-positions.md | 317 +++++++++++++ gtfs-realtime-book/ch-04-trip-updates.md | 217 +++++++++ gtfs-realtime-book/ch-05-protocol-buffers.md | 379 +++++++++++++++ .../ch-06-consuming-service-alerts.md | 305 ++++++++++++ .../ch-07-consuming-vehicle-positions.md | 509 +++++++++++++++++++++ gtfs-realtime-book/ch-08-consuming-trip-updates.md | 380 +++++++++++++++ gtfs-realtime-book/ch-09-database-storage.md | 492 ++++++++++++++++++++ gtfs-realtime-book/ch-10-extensions.md | 374 +++++++++++++++ gtfs-realtime-book/ch-11-publishing-feeds.md | 348 ++++++++++++++ gtfs-realtime-book/ch-12-conclusion.md | 29 ++ .../images/GTFS-realtime-consumption.png | Bin 0 -> 34733 bytes .../GTFS-realtime-direct-or-intermediate.png | Bin 0 -> 27796 bytes .../images/GTFS-realtime-structure.png | Bin 0 -> 44185 bytes gtfs-realtime-book/images/Modified-Feed.png | Bin 0 -> 23445 bytes .../images/bearing-from-position.png | Bin 0 -> 672483 bytes gtfs-realtime-book/images/boston-trip.png | Bin 0 -> 520901 bytes gtfs-realtime-book/images/mbta_bus.png | Bin 0 -> 242072 bytes gtfs-realtime-book/images/remaining_stops.png | Bin 0 -> 190083 bytes gtfs-realtime-book/images/stoptime_map.png | Bin 0 -> 1140001 bytes gtfs-realtime-book/images/vehicle-position.png | Bin 0 -> 912061 bytes 110 files changed, 6523 insertions(+), 6523 deletions(-) delete mode 100644 .idea/gtfs-book/ch-00-definitive-guide-to-gtfs.md delete mode 100644 .idea/gtfs-book/ch-01-introduction.md delete mode 100644 .idea/gtfs-book/ch-02-agencies.md delete mode 100644 .idea/gtfs-book/ch-03-stops.md delete mode 100644 .idea/gtfs-book/ch-04-routes.md delete mode 100644 .idea/gtfs-book/ch-05-trips.md delete mode 100644 .idea/gtfs-book/ch-06-stop-times.md delete mode 100644 .idea/gtfs-book/ch-07-calendar.md delete mode 100644 .idea/gtfs-book/ch-08-fares.md delete mode 100644 .idea/gtfs-book/ch-09-shapes.md delete mode 100644 .idea/gtfs-book/ch-10-frequencies.md delete mode 100644 .idea/gtfs-book/ch-11-stop-transfers.md delete mode 100644 .idea/gtfs-book/ch-12-feed-information.md delete mode 100644 .idea/gtfs-book/ch-13-importing-to-sql.md delete mode 100644 .idea/gtfs-book/ch-14-switching-to-integer-ids.md delete mode 100644 .idea/gtfs-book/ch-15-optimizing-shapes.md delete mode 100644 .idea/gtfs-book/ch-16-deleting-unused-data.md delete mode 100644 .idea/gtfs-book/ch-17-searching-for-trips.md delete mode 100644 .idea/gtfs-book/ch-18-working-with-trip-blocks.md delete mode 100644 .idea/gtfs-book/ch-19-calculating-fares.md delete mode 100644 .idea/gtfs-book/ch-20-trip-patterns.md delete mode 100644 .idea/gtfs-book/ch-21-conclusion.md delete mode 100644 .idea/gtfs-book/images/agency-google-maps.png delete mode 100644 .idea/gtfs-book/images/block-01.png delete mode 100644 .idea/gtfs-book/images/block-02.png delete mode 100644 .idea/gtfs-book/images/block-03.png delete mode 100644 .idea/gtfs-book/images/block-04.png delete mode 100644 .idea/gtfs-book/images/orthogonal-distance.jpeg delete mode 100644 .idea/gtfs-book/images/shape-original.jpeg delete mode 100644 .idea/gtfs-book/images/shape-reduced.jpeg delete mode 100644 .idea/gtfs-book/images/stop-times.png delete mode 100644 .idea/gtfs-book/images/stops-sample.png delete mode 100644 .idea/gtfs-realtime-book/ch-00-definitive-guide-to-gtfs-realtime.md delete mode 100644 .idea/gtfs-realtime-book/ch-01-introduction.md delete mode 100644 .idea/gtfs-realtime-book/ch-02-service-alerts.md delete mode 100644 .idea/gtfs-realtime-book/ch-03-vehicle-positions.md delete mode 100644 .idea/gtfs-realtime-book/ch-04-trip-updates.md delete mode 100644 .idea/gtfs-realtime-book/ch-05-protocol-buffers.md delete mode 100644 .idea/gtfs-realtime-book/ch-06-consuming-service-alerts.md delete mode 100644 .idea/gtfs-realtime-book/ch-07-consuming-vehicle-positions.md delete mode 100644 .idea/gtfs-realtime-book/ch-08-consuming-trip-updates.md delete mode 100644 .idea/gtfs-realtime-book/ch-09-database-storage.md delete mode 100644 .idea/gtfs-realtime-book/ch-10-extensions.md delete mode 100644 .idea/gtfs-realtime-book/ch-11-publishing-feeds.md delete mode 100644 .idea/gtfs-realtime-book/ch-12-conclusion.md delete mode 100644 .idea/gtfs-realtime-book/images/GTFS-realtime-consumption.png delete mode 100644 .idea/gtfs-realtime-book/images/GTFS-realtime-direct-or-intermediate.png delete mode 100644 .idea/gtfs-realtime-book/images/GTFS-realtime-structure.png delete mode 100644 .idea/gtfs-realtime-book/images/Modified-Feed.png delete mode 100644 .idea/gtfs-realtime-book/images/bearing-from-position.png delete mode 100644 .idea/gtfs-realtime-book/images/boston-trip.png delete mode 100644 .idea/gtfs-realtime-book/images/mbta_bus.png delete mode 100644 .idea/gtfs-realtime-book/images/remaining_stops.png delete mode 100644 .idea/gtfs-realtime-book/images/stoptime_map.png delete mode 100644 .idea/gtfs-realtime-book/images/vehicle-position.png create mode 100644 gtfs-book/ch-00-definitive-guide-to-gtfs.md create mode 100644 gtfs-book/ch-01-introduction.md create mode 100644 gtfs-book/ch-02-agencies.md create mode 100644 gtfs-book/ch-03-stops.md create mode 100644 gtfs-book/ch-04-routes.md create mode 100644 gtfs-book/ch-05-trips.md create mode 100644 gtfs-book/ch-06-stop-times.md create mode 100644 gtfs-book/ch-07-calendar.md create mode 100644 gtfs-book/ch-08-fares.md create mode 100644 gtfs-book/ch-09-shapes.md create mode 100644 gtfs-book/ch-10-frequencies.md create mode 100644 gtfs-book/ch-11-stop-transfers.md create mode 100644 gtfs-book/ch-12-feed-information.md create mode 100644 gtfs-book/ch-13-importing-to-sql.md create mode 100644 gtfs-book/ch-14-switching-to-integer-ids.md create mode 100644 gtfs-book/ch-15-optimizing-shapes.md create mode 100644 gtfs-book/ch-16-deleting-unused-data.md create mode 100644 gtfs-book/ch-17-searching-for-trips.md create mode 100644 gtfs-book/ch-18-working-with-trip-blocks.md create mode 100644 gtfs-book/ch-19-calculating-fares.md create mode 100644 gtfs-book/ch-20-trip-patterns.md create mode 100644 gtfs-book/ch-21-conclusion.md create mode 100644 gtfs-book/images/agency-google-maps.png create mode 100644 gtfs-book/images/block-01.png create mode 100644 gtfs-book/images/block-02.png create mode 100644 gtfs-book/images/block-03.png create mode 100644 gtfs-book/images/block-04.png create mode 100644 gtfs-book/images/orthogonal-distance.jpeg create mode 100644 gtfs-book/images/shape-original.jpeg create mode 100644 gtfs-book/images/shape-reduced.jpeg create mode 100644 gtfs-book/images/stop-times.png create mode 100644 gtfs-book/images/stops-sample.png create mode 100644 gtfs-realtime-book/ch-00-definitive-guide-to-gtfs-realtime.md create mode 100644 gtfs-realtime-book/ch-01-introduction.md create mode 100644 gtfs-realtime-book/ch-02-service-alerts.md create mode 100644 gtfs-realtime-book/ch-03-vehicle-positions.md create mode 100644 gtfs-realtime-book/ch-04-trip-updates.md create mode 100644 gtfs-realtime-book/ch-05-protocol-buffers.md create mode 100644 gtfs-realtime-book/ch-06-consuming-service-alerts.md create mode 100644 gtfs-realtime-book/ch-07-consuming-vehicle-positions.md create mode 100644 gtfs-realtime-book/ch-08-consuming-trip-updates.md create mode 100644 gtfs-realtime-book/ch-09-database-storage.md create mode 100644 gtfs-realtime-book/ch-10-extensions.md create mode 100644 gtfs-realtime-book/ch-11-publishing-feeds.md create mode 100644 gtfs-realtime-book/ch-12-conclusion.md create mode 100644 gtfs-realtime-book/images/GTFS-realtime-consumption.png create mode 100644 gtfs-realtime-book/images/GTFS-realtime-direct-or-intermediate.png create mode 100644 gtfs-realtime-book/images/GTFS-realtime-structure.png create mode 100644 gtfs-realtime-book/images/Modified-Feed.png create mode 100644 gtfs-realtime-book/images/bearing-from-position.png create mode 100644 gtfs-realtime-book/images/boston-trip.png create mode 100644 gtfs-realtime-book/images/mbta_bus.png create mode 100644 gtfs-realtime-book/images/remaining_stops.png create mode 100644 gtfs-realtime-book/images/stoptime_map.png create mode 100644 gtfs-realtime-book/images/vehicle-position.png diff --git a/.idea/gtfs-book/ch-00-definitive-guide-to-gtfs.md b/.idea/gtfs-book/ch-00-definitive-guide-to-gtfs.md deleted file mode 100644 index 2c32c67..0000000 --- a/.idea/gtfs-book/ch-00-definitive-guide-to-gtfs.md +++ /dev/null @@ -1,55 +0,0 @@ -# The Definitive Guide to GTFS - -*Consuming open public transportation data with the General Transit Feed -Specification* - -Originally written by Quentin Zervaas. - -## About This Book - -This book is a comprehensive guide to GTFS -- the General Transit Feed -Specification. It is comprised of two main sections. - -The first section describes what GTFS is and provides details about the -specification itself. In addition to this it also provides various -discussion points and things to consider for each of the files in the -specification. - -The second section covers a number of topics that relate to actually -using GTFS feeds, such as how to calculate fares, how to search for -trips, how to optimize feed data and more. - -This book is written for developers that are using transit data for web -sites, mobile applications and more. It aims to be as language-agnostic -as possible, but uses SQL to demonstrate concepts of extracting data -from a GTFS feed. - -### About The Author - -Quentin Zervaas is a software developer from Adelaide, Australia. - -Quentin was the founder of TransitFeeds (now ), a web -site that provides a comprehensive listing of public transportation data -available around the world. This site is referenced various times -throughout this book. - -### Credits - -First Edition. Published in February 2014. - -**Technical Reviewer** - -Rupert Hanson - -**Copy Editor** - -Miranda Little - -**Disclaimer** - -The information in this book is distributed on an "as is" basis, without -warranty. Although every precaution has been taken in the preparation of -this work, the author shall not be liable to any person or entity with -respect to any loss or damage caused or alleged to be caused directly or -indirectly by the information contained in this book. - diff --git a/.idea/gtfs-book/ch-01-introduction.md b/.idea/gtfs-book/ch-01-introduction.md deleted file mode 100644 index 3b3765c..0000000 --- a/.idea/gtfs-book/ch-01-introduction.md +++ /dev/null @@ -1,45 +0,0 @@ -## 1. Introduction to GTFS - -GTFS (General Transit Feed Specification) is a data standard developed -by Google used to describe a public transportation system. Its primary -purpose was to enable public transit agencies to upload their schedules -to Google Transit so that users of Google Maps could easily figure out -which bus, train, ferry or otherwise to catch. - -> **A GTFS feed is a ZIP file that contains a series of CSV files that -list routes, stops and trips in a public transportation system.** - -This book examines GTFS in detail, including which data from a public -transportation system can be represented, how to extract data, and -explores some more advanced techniques for optimizing and querying data. - -The official GTFS specification has been referenced a number of times in -this book. It is strongly recommended you are familiar with it. You can -view at the following URL: - - - -### Structure of a GTFS Feed - -A GTFS feed is a series of CSV files, which means that it is trivial to -include additional files in a feed. Additionally, files required as part -of the specification can also include additional columns. For this -reason, feeds from different agencies generally include different levels -of detail. - -***Note:** The files in a GTFS feed are CSV files, but use a file -extension of `.txt`.* - -A GTFS feed can be described as follows: - -> **A GTFS feed has one or more routes. Each route (`routes.txt`) has one or -more trips (`trips.txt`). Each trip visits a series of stops (`stops.txt`) -at specified times (`stop_times.txt`). Trips and stop times only contain -time of day information; the calendar is used to determine on which days -a given trip runs (`calendar.txt` and `calendar_dates.txt`).** - -The following chapters cover the main files that are included in all -GTFS feeds. For each file, the main columns are covered, as well as -optional columns that can be included. This book also covers some of the -unofficial columns that some agencies choose to include. - diff --git a/.idea/gtfs-book/ch-02-agencies.md b/.idea/gtfs-book/ch-02-agencies.md deleted file mode 100644 index cb837ae..0000000 --- a/.idea/gtfs-book/ch-02-agencies.md +++ /dev/null @@ -1,47 +0,0 @@ -## 2. Agencies (agency.txt) - -*This file is ***required*** to be included in GTFS feeds.* - -The `agency.txt` file is used to represent the agencies that provide -data for this feed. While its presence is optional, if there are routes -from multiple agencies included, then records in `routes.txt` make -reference to agencies in this file. - -| Field | Required? | Description | -| :----------------------------------------------------- | :--------: | :-------- | -| `agency_id` | Optional | An ID that uniquely identifies a single transit agency in the feed. If a feed only contains routes for a single agency then this value is optional. | -| `agency_name` | Required | The full name of the transit agency. | -| `agency_url` | Required | The URL of the transit agency. Must be a complete URL only, beginning with `http://` or `https://`. | -| `agency_timezone` | Required | Time zone of agency. All times in `stop_times.txt` use this time zone, unless overridden by its corresponding stop. All agencies in a single feed must use the same time zone. Example: **America/New_York** (See for more examples) | -| `agency_lang` | Required | Contains a two-letter ISO-639-1 code (such as `en` or `EN` for English) for the language used in this feed. | -| `agency_phone` | Optional | A single voice telephone number for the agency that users can dial if required. | -| `agency_fare_url` | Optional | A URL that describes fare information for the agency. Must be a complete URL only, beginning with `http://` or `https://`. | - -### Sample Data - -The following extract is taken from the GTFS feed of TriMet (Portland, -USA), located at . - -| `agency_name` | `agency_url` | `agency_timezone` | `agency_lang` | `agency_phone` | -| :------------ | :------------------------------------------- | :-------------------- | :------------ | :--------------- | -| `TriMet` | `[https://trimets.org](https://trimet.org/)` | `America/Los_Angeles` | `en` | `(503) 238-7433` | - -In this example, the `agency_id` column is included, but as there is -only a single entry the value can be empty. This means the `agency_id` -column in `routes.txt` also is not required. - -### Discussion - -The data in this file is typically used to provide additional -information to users of your app or web site in case schedules derived -from the rest of this feed are not sufficient (or in the case of -`agency_fare_url`, an easy way to provide a reference point to users -if the fare information in the feed is not being used). - -If you refer to the following screenshot, taken from Google Maps, you -can see the information from `agency.txt` represented in the -lower-left corner as an example of how it can be used. - -![GTFS agency](images/agency-google-maps.png) - - diff --git a/.idea/gtfs-book/ch-03-stops.md b/.idea/gtfs-book/ch-03-stops.md deleted file mode 100644 index cd8dedb..0000000 --- a/.idea/gtfs-book/ch-03-stops.md +++ /dev/null @@ -1,117 +0,0 @@ -## 3. Stops & Stations (stops.txt) - -*This file is ***required*** to be included in GTFS feeds.* - -The individual locations where vehicles pick up or drop off passengers -are represented by `stops.txt`. Records in this file are referenced in -`stop_times.txt`. A record in this file can be either a stop or a -station. A station has one or more child stops, as indicated using the -`parent_station` value. Entries that are marked as stations may not -appear in `stop_times.txt`. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `stop_id` | Required | An ID to uniquely identify a stop or station. | -| `stop_code` | Optional | A number or short string used to identify a stop to passengers. This is typically displayed at the physical stop or on printed schedules. | -| `stop_name` | Required | The name of the stop as passengers know it by. | -| `stop_desc` | Optional | A description of the stop. If provided, this should provide additional information to the `stop_name` value. | -| `stop_lat` | Required | The latitude of the stop (a number in the range of `-90` to `90`). | -| `stop_lon` | Required | The longitude of the stop (a number in the range of `-180` to `180`). | -| `zone_id` | Optional | This is an identifier used to calculate fares. A single zone ID may appear in multiple stops, but is ignored if the stop is marked as a station. | -| `stop_url` | Optional | A URL that provides information about this stop. It should be specific to this stop and not simply link to the agency's web site. | -| `location_type` | Optional | Indicates if a record is a stop or station. `0` or blank means a stop, `1` means a station. | -| `parent_station` | Optional | If a record is marked as a stop and has a parent station, this contains the ID of the parent (the parent must have a `location_type` of `1`). | -| `stop_timezone` | Optional | If a stop is located in a different time zone to the one specified in `agency.txt`, then it can be overridden here. | -| `wheelchair_boarding` | Optional | A value of `1` indicates it is possible for passengers in wheelchairs to board or alight. A value of `2` means the stop is not wheelchair accessible, while `0` or an empty value means no information is available. If the stop has a parent station, then 0 or an empty value means to inherit from its parent. | - -### Sample Data - -The following extract is taken from the TriMet GTFS feed -(). - -| `stop_id` | `stop_code` | `stop_name` | `stop_lat` | `stop_lon` | `stop_url` | -| :-------- | :----------- | :------------------ | :---------- | :------------ | :---------------------------------------------------| -| `2` | `2` | `A Ave & Chandler` | `45.420595` | `-122.675676` |` ` | -| `3` | `3` | `A Ave & Second St` | `45.419386` | `-122.665341` |` ` | -| `4` | `4` | `A Ave & 10th St` | `45.420703` | `-122.675152` |` ` | -| `6` | `6` | `A Ave & 8th St` | `45.420217` | `-122.67307` |` ` | - -The following diagram shows how these points look if you plot them onto -a map. - -![Stops](images/stops-sample.png) - -In this extract, TriMet use the same value for stop IDs and stop codes. -This is useful, because it means the stop IDs are stable (that is, they -do not change between feed versions). This means that if you want to -save a particular stop (for instance, if a user wants to save a -"favorite stop") you can trust that saving the ID will get the job done. - -**Note:** This is not always the case though, which means you may have -to save additional information if you want to save a stop. For instance, -you may need to save the coordinates or the list of routes a stop serves -so you can find it again if the stop ID has changed in a future version -of the feed. - -### Stops & Stations - -Specifying an entry in this file as a *station* is typically used when -there are many stops located within a single physical entity, such as a -train station or bus depot. While many feeds do not offer this -information, some large train stations may have up to 20 or 30 -platforms. - -Knowing the platform for a specific trip is extremely useful, but if a -passenger wants to select a starting point for their trip, showing them -a list of platforms may be confusing. - -Passenger: - -**"I want to travel from *Central Station* to *Airport Station*."** - -Web site / App: - -**"Board at *Central Station platform 5*, disembark at *Airport Station platform 1*."** - -In this example, the passenger selects the parent station, but they are -presented with the specific stop so they know exactly where within the -station they need to embark or disembark. - -### Wheelchair Accessibility - -If you are showing wheelchair accessibility information, it is important -to differentiate between "no access" and "no information", as knowing a -stop is not accessible is as important as knowing it is. - -If a stop is marked as being wheelchair accessible, you must check that -trips that visit the stop are also accessible (using the -`wheelchair_accessible` field in `trips.txt`). If the value in -`trips.txt` is blank, `0` or `1` then it is safe to assume the -trip can be accessed. If the stop is accessible and the trip is not, -then passengers in wheelchairs cannot use the trip. - -### Stop Features - -One of the proposed changes to GTFS is the addition of a file called -`stop_features.txt`. This is used to define characteristics about -stops. The great thing about this file is that it allows you to indicate -to users when a stop has a ticket machine, bike storage, lighting, or an -electronic display with real-time information. - -TriMet is one of the few agencies including this file. The following is -a sample of this file. - -| `stop_id` | `feature_type` | -| :-------- | :------------- | -| `61` | `4110` | -| `61` | `2310` | -| `61` | `5200` | - -This data indicate that stop `61` (NE Alberta & 24th) has a *Printed -Schedule Display* (`4110`), a *Bike Rack* (`2310`) and a *Street -Light* (`5200`). - -For more information about this proposal and a list of values and their -meanings, refer to -. - diff --git a/.idea/gtfs-book/ch-04-routes.md b/.idea/gtfs-book/ch-04-routes.md deleted file mode 100644 index 2e9716e..0000000 --- a/.idea/gtfs-book/ch-04-routes.md +++ /dev/null @@ -1,82 +0,0 @@ -## 4. Routes (routes.txt) - -*This file is ***required*** to be included in GTFS feeds.* - -A route is a group of trips that are displayed to riders as a single -service. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `route_id` | Required | An ID that uniquely identifies the route. | -| `agency_id` | Optional | The ID of the agency a route belongs to, as it appears in `agency.txt`. Only required if there are multiple agencies in the feed. | -| `route_short_name` | Required | A nickname or code to represent this service. If this is left empty then the `route_long_name` must be included. | -| `route_long_name` | Required | The route full name. If this is left empty then the `route_short_name` must be included. | -| `route_desc` | Optional | A description of the route, such as where and when the route operates. | -| `route_type` | Required | The type of transportation used on a route (such as bus, train or ferry). See below for more information. | -| `route_url` | Optional | A URL of a web page that describes this particular route. | -| `route_color` | Optional | If applicable, a route can have a color assigned to it. This is useful for systems that use colors to identify routes. This value is a six-character hexadecimal number (for example, `FF0000` is red). | -| `route_text_color` | Optional | For routes that specify the `route_color`, a corresponding text color should also be specified. | - -### Sample Data - -The following extract is taken from the TriMet GTFS feed -(). - -| `route_id` | `route_short_name` | `route_long_name` | `route_type` | -| :--------- | :----------------- | :--------------------------- | :----------- | -| `1` | `1` | `Vermont` | `3` | -| `4` | `4` | `Division / Fessenden` | `3` | -| `6` | `6` | `Martin Luther King Jr Blvd` | `3` | - -This sample shows three different bus routes for the greater Portland -area. The `route_type` value of `3` indicates they are buses. See -the next section for more information about route types in GTFS. - -There is no agency ID value in this feed, as TriMet is the only agency -represented in the feed. - -The other thing to note about this data is that TriMet use the same -value for both `route_id` and `route_short_name`. This is very -useful, because it means if you have a user that wants to save -information about a particular route you can trust the `route_id` -value. Unfortunately, this is not the case in all GTFS feeds. Sometimes, -the `route_id` value may change with every version of a feed (or at -least, semi-frequently). Additionally, some feeds may also have multiple -routes with the same `route_short_name`. This can present challenges -when trying to save user data. - -### Route Types - -To indicate a route's mode of transport, the `route_type` column is -used. - -| Value | Description | -| :---- | :---------------- | -| `0` | Tram / Light Rail | -| `1` | Subway / Metro | -| `2` | Rail | -| `3` | Bus | -| `4` | Ferry | -| `5` | Cable Car | -| `6` | Gondola | -| `7` | Funicular | - -Agencies may interpret the meaning of these route types differently. For -instance, some agencies specify their subway service as rail (value of -`2` instead of `1`), while some specify their trains as light rail -(`0` instead of `2`). - -These differences between agencies occur mainly because of the vague -descriptions for each of these route types. If you use Google Transit to -find directions, you may notice route types referenced that are -different to those listed above. This is because Google Transit also -supports additional route types. You can read more about these -additional route types at -. - -Very few GTFS feeds made available to third-party developers actually -make use of these values, but it is useful to know in case you come -across one that does. For instance, Sydney Buses include their school -buses with a route type of `712`, while other buses in the feed have -route type `700`. - diff --git a/.idea/gtfs-book/ch-05-trips.md b/.idea/gtfs-book/ch-05-trips.md deleted file mode 100644 index ee2b814..0000000 --- a/.idea/gtfs-book/ch-05-trips.md +++ /dev/null @@ -1,222 +0,0 @@ -## 5. Trips (trips.txt) - -*This file is ***required*** to be included in GTFS feeds.* - -The `trips.txt` file contains trips for each route. The specific stop -times are specified in `stop_times.txt`, and the days each trip runs -on are specified in `calendar.txt` and `calendar_dates.txt`. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `route_id` | Required | The ID of the route a trip belongs to as it appears in `routes.txt`. | -| `service_id` | Required | The ID of the service as it appears in `calendar.txt` or `calendar_dates.txt`, which identifies the dates on which a trip runs. | -| `trip_id` | Required | A unique identifier for a trip in this file. This value is referenced in `stop_times.txt` when specifying individual stop times. | -| `trip_headsign` | Optional | The text that appears to passengers as the destination or description of the trip. Mid-trip changes to the headsign can be specified in `stop_times.txt`. | -| `trip_short_name` | Optional | A short name or code to identify the particular trip, different to the route's short name. This may identify a particular train number or a route variation. | -| `direction_id` | Optional | Indicates the direction of travel for a trip, such as to differentiate between an inbound and an outbound trip. | -| `block_id` | Optional | A block is a series of trips conducted by the same vehicle. This ID is used to group 2 or more trips together. | -| `shape_id` | Optional | This value references a value from `shapes.txt` to define a shape for a trip. | -| `wheelchair_accessible` | Optional | `0` or blank indicates unknown, while `1` indicates the vehicle can accommodate at least one wheelchair passenger. A value of `2` indicates no wheelchairs can be accommodated. | - -### Sample Data - -Consider the following extract, taken from the `trips.txt` file of the -TriMet GTFS feed (). - -| `route_id` | `service_id` | `trip_id` | `direction_id` | `block_id` | `shape_id` | -| ---------- | ------------ | ----------- | -------------- | ---------- | ---------- | -| 1 | W.378 | 4282257 | 0 | 103 | 185327 | -| 1 | W.378 | 4282256 | 0 | 101 | 185327 | -| 1 | W.378 | 4282255 | 0 | 102 | 185327 | -| 1 | W.378 | 4282254 | 0 | 103 | 185327 | - -This data describes four individual trips for the "Vermont" bus route -(this was determined by looking up the `route_id` value in -`routes.txt`). While the values for `trip_id`, `block_id` and -`shape_id` are all integers in this particular instance, this is not a -requirement. Just like `service_id`, there may be non-numeric -characters. - -As each of these trips is for the same route and runs in the same -direction (based on `direction_id`) they can all be represented by the -same shape. Note however that this is not always be the case as some -agencies may start or finish trips for a single route at different -locations depending on the time of the day. If this were the case, then -the trip's shape would differ slightly (and therefore have a different -shape to represent it in `shapes.txt`). - -Although this example does not include `trip_headsign`, many feeds do -include this value. This is useful for indicating to a passenger where -the trip is headed. When the trip headsign is not provided in the feed, -you can determine the destination by using the final stop in a trip. - -**Tip:** If you are determining the destination based on the final stop, -you can either present the stop name to the user, or you can -reverse-geocode the stop's coordinates to determine the its locality. - -### Blocks - -In the preceding example, each trip has a value for `block_id`. The -first and the last trips here both have a `block_id` value of `103`. -This indicates that the same physical vehicle completes both of these -trips. As each of these trips go in the same direction, it is likely -that they start at the same location. - -This means there is probably another trip in the feed for the same block -that exists between the trips listed here. It would likely travel from -the finishing point of the first trip (`4282257`) to the starting -point of the other trip (`4282254`). If you dig a little deeper in the -feed you will find the trip shown in the following table. - -| `route_id` | `service_id` | `trip_id` | `direction_id` | `block_id` | `shape_id` | -| :--------- | :----------- | :-------- | :------------- | :--------- | :--------- | -| 1 | W.378 | 4282270 | 1 | 103 | 185330 | - -This is a trip traveling in the opposite direction for the same block. -It has a different shape ID because it is traveling in the opposite -direction; a shape's points must advance in the same direction a trip's -stop times do. - -***Note:** You should perform some validation when grouping trips -together using block IDs. For instance, if trips share a block_id value -then they should also have the same service_id value. You should also -check that the times do not overlap; otherwise the same vehicle would be -unable to service both trips.* - -If you dig even further in this feed, there are actually seven different -trips all using block `103` for the **W.378** service period. This -roughly represents a full day's work for a single vehicle. - -For more discussion on blocks and how to utilize them effectively, refer -to *Working With Trip Blocks*. - -### Wheelchair Accessibility - -Similar to `stops.txt`, you can specify the wheelchair accessibility -of a specific trip using the `wheelchair_accessible` field. While many -feeds do not provide this information (often because vehicles in a fleet -can be changed at the last minute, so agencies do not want to guarantee -this information), your wheelchair-bound users will love you if you can -provide this information. - -As mentioned in the section on `stops.txt`, it is equally important to -tell a user that a specific vehicle cannot accommodate wheelchairs as to -when it can. Additionally, if the stops in a feed also have wheelchair -information, then both the stop and trip must be wheelchair accessible -for a passenger to be able to access a trip at the given stop. - -### Trip Direction - -One of the optional fields in `trips.txt` is `direction_id`, which -is used to indicate the general direction a vehicle is traveling. At -present the only possible values are `0` to represent "inbound" and -`1` to represent "outbound". There are no specific guidelines as to -what each value means, but the intention is that an inbound trip on a -particular route should be traveling in the opposite direction to an -outbound trip. - -Many GTFS feeds do not provide this information. In fact, there are a -handful of feeds that include two entries in `routes.txt` for each -route (one for each direction). - -One of the drawbacks of `direction_id` is that there are many routes -for which "inbound" or "outbound" do not actually mean anything. Many -cities have loop services that start and finish each trip at the same -location. Some cities have one or more loops that travel in both -directions (generally indicated by "clockwise loop" and -"counter-clockwise loop", or words to that effect). In these instances, -the `direction_id` can be used to determine which direction the route -is traveling. - -### Trip Short Name - -The `trip_short_name` field that appears in `trips.txt` is used to -provide a vehicle-specific code to a particular trip on a route. Based -on GTFS feeds that are currently in publication, it appears there are -two primary use-cases for this field: - -* Specifying a particular train number for all trains on a route -* Specifying a route "sub-code" for a route variant. - -### Specifying a Train Number - -For certain commuter rail systems, such as SEPTA in Philadelphia or MBTA -in Boston, each train has a specific number associated with it. This -number is particularly meaningful to passengers as trains on the same -route may have different stopping patterns or even different features -(for instance, only a certain train may have air-conditioning or -wheelchair access). - -Consider the following extract from SEPTA's rail feed -(). - -| `route_id` | `service_id` | `trip_id` | `trip_headsign` | `block_id` | `trip_short_name` | -| ---------- | ------------ | ------------ | ------------------------ | ---------- | ----------------- | -| AIR | S5 | AIR_1404_V25 | Center City Philadelphia | 1404 | 1404 | -| AIR | S1 | AIR_402_V5 | Center City Philadelphia | 402 | 402 | -| AIR | S1 | AIR_404_V5 | Center City Philadelphia | 404 | 404 | -| AIR | S5 | AIR_406_V25 | Center City Philadelphia | 406 | 406 | - -In this data, there are four different trains all heading to the same -destination. The `trip_short_name` is a value that can safely be -presented to users as it has meaning to them. In this case, you could -present the first trip to passengers as: - -**"Train 1404 on the Airport line heading to Center City -Philadelphia."** - -In this particular feed, SEPTA use the same value for -`trip_short_name` and for `block_id`, because the train number -belongs to a specific train. This means after it completes the trip to -Center City Philadelphia it continues on. In this particular feed, the -following trip also exists: - -**"Train 1404 on the Warminster line heading to Glenside."** - -You can therefore think of the `trip_short_name` value as a -"user-facing" version of `block_id`. - -### Specifying a Route Sub-Code - -The other use-case for `trip_short_name` is for specifying a route -sub-code. For instance, consider an agency that has a route with short -name `100` that travels from stop `S1` to stop `S2`. At night the -agency only has a limited number of routes running, so they extend this -route to also visit stop `S3` (so it travels from `S1` to `S2` -then to `S3`). As it is a minor variation of the main path, the agency -calls this trip `100A`. - -The agency could either create a completely separate entry in -`routes.txt` (so they would have `100` and `100A`), or they can -override the handful of trips in the evening by setting the -`trip_short_name` to `100A`. The following table shows how this -example might be represented. - -| `route_id` | `trip_id` | `service_id` | `trip_short_name` | -| ---------- | --------- | ------------ | ----------------- | -| 100 | T1 | C1 | | -| 100 | T2 | C1 | | -| 100 | T3 | C1 | | -| 100 | T4 | C1 | 100A | - -In this example the `trip_short_name` does not need to be set for the -first three trips as they use the `route_short_name` value from -`routes.txt`. - -### Specifying Bicycle Permissions - -A common field that appears in many GTFS fields is -`trip_bikes_allowed`, which is used to indicate whether or not -passengers are allowed to take bicycles on board. This is useful for -automated trip planning when bicycle options can be included in the -results. - -The way this field works is similar to the wheelchair information; `0` -or empty means no information provided; `1` means no bikes allowed; -while `2` means at least one bike can be accommodated. - -**Note:** Unfortunately, this value is backwards when you compare it to -wheelchair accessibility fields. For more discussion on this matter, -refer to the topic on the Google Group for GTFS Changes -(). - diff --git a/.idea/gtfs-book/ch-06-stop-times.md b/.idea/gtfs-book/ch-06-stop-times.md deleted file mode 100644 index 9eb0f93..0000000 --- a/.idea/gtfs-book/ch-06-stop-times.md +++ /dev/null @@ -1,160 +0,0 @@ -## 6. Stop Times (stop_times.txt) - -*This file is ***required*** to be included in GTFS feeds.* - -The `stop_times.txt` file specifies individual stop arrivals and -departures for each trip. This file is typically the largest in a GTFS -feed as it contains many records that correspond to each entry in -`trips.txt`. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `trip_id` | Required | References a trip from `trips.txt`. This ID is referenced for every stop in a trip. | -| `arrival_time` | Required | The arrival time in `HH:MM:SS` format. Can be left blank, except for at least the first and last stop time in a trip. This value is typically be the same as `departure_time`. | -| `departure_time` | Required | The departure time in `HH:MM:SS` format. Can be left blank, except for at least the first and last stop time in a trip. This value is typically be the same as `arrival_time`. | -| `stop_id` | Required | References a single stop from `stops.txt`. | -| `stop_sequence` | Required | A unique number for a given trip to indicate the stopping order. Typically these values appear in order and increment by 1 for each stop time, but this is not always the case. | -| `stop_headsign` | Optional | This is text that appears to passengers at this stop to identify the trip's destination. It should only be used to override the `trip_headsign` value from `trips.txt`. | -| `pickup_type` | Optional | Indicates if passengers can be picked up at this stop. Sometimes a stop is drop-off only. | -| `drop_off_type` | Optional | Indicates if passengers can be dropped off at this stop. Sometimes a stop is pick-up only. | -| `shape_dist_traveled` | Optional | If a trip has an associated shape, this value indicates how far along that shape the vehicle has traveled when at this stop. Values in this file and `shapes.txt` must use the same unit. | - -### Sample Data - -Consider the following extract, taken from stop_times.txt in the TriMet -GTFS feed (). This represents the -first ten stops of a trip for bus route `1` ("Vermont") in Portland, as -covered in Sample Data for `trips.txt`. - -| `trip_id` | `arrival_time` | `departure_time` | `stop_id` | `stop_sequence` | `shape_dist_traveled` | -| :-------- | :------------- | :--------------- | :-------- | :-------------- | :-------------------- | -| 4282247 | 06:47:00 | 06:47:00 | 13170 | 1 | 0.0 | -| 4282247 | 06:48:18 | 06:48:18 | 7631 | 2 | 867.5 | -| 4282247 | 06:50:13 | 06:50:13 | 7625 | 3 | 2154.9 | -| 4282247 | 06:52:07 | 06:52:07 | 7612 | 4 | 3425.5 | -| 4282247 | 06:53:42 | 06:53:42 | 7616 | 5 | 4491.1 | -| 4282247 | 06:55:16 | 06:55:16 | 10491 | 6 | 5536.2 | -| 4282247 | 06:57:06 | 06:57:06 | 7588 | 7 | 6767.1 | -| 4282247 | 06:58:00 | 06:58:00 | 7591 | 8 | 7364.4 | -| 4282247 | 06:58:32 | 06:58:32 | 175 | 9 | 8618.7 | -| 4282247 | 06:58:50 | 06:58:50 | 198 | 10 | 9283.8 | - -If you were to plot the full trip on a map (including using its shape -file, as specified in `shapes.txt` and referenced in `trips.txt`), -it would look like the following diagram. The first stop is selected. - -![Stop Times](images/stop-times.png) - -### Arrival Time vs. Departure Time - -The first thing to notice about this data is that the values in -`arrival_time` and `departure_time` are the same. In reality, most -of the time these values are the same. The situation where these values -differ is typically when a vehicle is required to wait for a period of -time before departing. For instance: - -**"The train departs the domestic airport at 6:30am, arrives at the -international terminal at 6:35am. It waits for 10 minutes for passengers -who have just landed to board, then departs for the city at 6:45am."** - -While this is not typically something you need to worry about, be aware -that some feeds differ and the holdover time could be large. A person -rushing to a train wants to know the time it departs, while a husband -waiting at a stop to meet his wife wants to know what time it arrives. - -**Note:** In a situation where the difference between the arrival time -and departure is small, you may be better off always displaying the -earlier time to the user. The driver may view a one or two minute -holdover as an opportunity to keep on time, whereas a ten or fifteen -minute holdover is unlikely to be ignored as doing so would -significantly alter the schedule. - -### Scheduling Past Midnight - -One of the most important concepts to understand about -`stop_times.txt` is that times later than midnight can be specified. -For example, if a trip starts at 11:45 PM and takes an hour to complete, -its finishing time is 12:45 AM the next day. - -The `departure_time` value for the first stop is be `23:45:00`, -while the `arrival_time` value for the final stop is `24:45:00`. -If you were to specify the final arrival time as `00:45:00`, it -would be referencing 12:45 AM prior to the trip's starting time. - -While you could use the `stop_sequence` to determine which day the -trip fell on, it would be impossible to do a quick search purely based -on the given final stop. - -A trip may start and finish after midnight without being considered as -part of the next day's service. Many transit systems shut down -overnight, but may have a few services that run after midnight until -about 1 AM or 2 AM. It is logical to group them all together with the -same service as earlier trips, which this functionality allows you to -do. - -However, this has implications when searching for trips. For instance, -if you want to find all trips between 12:00 AM and 1:00 AM on 30 January -2014, then you need to search: - -* Between `00:00:00` and `01:00:00` for trips with service on `20140130` -* Between `24:00:00` and `25:00:00` for trips with service on` 20140129` - -In *Searching for Trips* you can see how to apply this to your -trip searches. - -### Time Points - -Most GTFS feeds provide arrival/departure times for every single stop. -In reality, most agencies do not have known times (or at least, they do -not publish times) for many of their stops. - -Typically, for routes that describe trains, subways or ferries that make -relatively few stops in a trip, all stops have a specified time. -However, often for bus routes that may make many stops in a single trip, -generally only the main stops have times shown on printed schedules. - -Typically, the bus drivers are required to meet these time points; if -they are ahead of schedule they might wait at a time point until the -scheduled time; if they are running late they might try to catch up in -order to adhere to the specified time point. - -This means that the intermediate points are likely estimates that have -been interpolated based on the amount of time between time points. If -there are multiple stops in-between the time points then the distance -between stops may also be used to calculate the estimate. - -In actual fact, GTFS feeds do not have to specify times for all stops. -The data in the following table is perfectly valid for a trip. - -| `trip_id` | `arrival_time` | `stop_id` | `stop_sequence` | `shape_dist_traveled` | -| :-------- | :------------- | :-------- | :-------------- | :-------------------- | -| T1 | 10:00:00 | S1 | 1 | 0 | -| T1 | | S2 | 2 | 1500 | -| T1 | | S3 | 3 | 3000 | -| T1 | 10:12:00 | S4 | 4 | 6000 | - -Based on this data, without taking into account the distance traveled, -you may estimate that the second stop arrives at 10:04 AM while the -third stop arrives at 10:08. - -If you consider the distance traveled, you might conclude the second -stop arrives at 10:03 AM while the third stop arrives at 10:06 AM. - -Some agencies include an additional column in `stop_times.txt` called -`timepoint`. This is used when they specify the times for all stops -but also want to indicate if only certain stops are guaranteed times. - -The following table shows how this would look using the previous data as -its basis. - -| `trip_id` | `arrival_time` | `stop_id` | `stop_sequence` | `shape_dist_traveled` | `timepoint` | -| :--------- | :------------- | :-------- | :-------------- | :-------------------- | :---------- | -| T1 | 10:00:00 | S1 | 1 | 0 | 1 | -| T1 | 10:03:00 | S2 | 2 | 1500 | 0 | -| T1 | 10:06:00 | S3 | 3 | 3000 | 0 | -| T1 | 10:12:00 | S4 | 4 | 6000 | 1 | - -This can be especially useful if you want to highlight these time points -so as to represent the printed schedules accurately, or even if you are -a transit agency just using the data for internal reporting. - diff --git a/.idea/gtfs-book/ch-07-calendar.md b/.idea/gtfs-book/ch-07-calendar.md deleted file mode 100644 index aebf131..0000000 --- a/.idea/gtfs-book/ch-07-calendar.md +++ /dev/null @@ -1,138 +0,0 @@ -## 7. Trip Schedules (calendar.txt & calendar_dates.txt) - -*Each of these files are ***optional*** in a GTFS feed, but at least one -of them is ***required***.* - -The `calendar.txt` file is used to indicate the range of dates on -which trips are running. It works by including a start date and a finish -date (typically a range of 3-6 months), then a marker for each day of -the week on which it operates. If there are single-day scheduling -changes that occur during this period, then the `calendar_dates.txt` -file can be used to override the schedule for each of these days. - -The following table shows the specification for `calendar.txt`. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `service_id` | Required | A unique ID for a single service. This value is referenced by trips in `trips.txt`. | -| `start_date` | Required | This indicates the start date for a given service, in `YYYYMMDD` format. | -| `end_date` | Required | This indicates the end date for a given service, in `YYYYMMDD` format. | -| `monday` | Required | Contains `1` if trips run on Mondays between the start and end dates, `0` or empty if not. | -| `tuesday` | Required | Contains `1` if trips run on Tuesdays between the start and end dates, `0` or empty if not. | -| `wednesday` | Required | Contains `1` if trips run on Wednesdays between the start and end dates, `0` or empty if not. | -| `thursday` | Required | Contains `1` if trips run on Thursdays between the start and end dates, `0` or empty if not. | -| `friday` | Required | Contains `1` if trips run on Fridays between the start and end dates, `0` or empty if not. | -| `saturday` | Required | Contains `1` if trips run on Saturdays between the start and end dates, `0` or empty if not. | -| `sunday` | Required | Contains `1` if trips run on Sundays between the start and end dates, `0` or empty if not. | - -As mentioned above, the `calendar_dates.txt` file is used to define -exceptions to entries in `calendar.txt`. For instance, if a 3-month -service is specified in `calendar.txt` and a holiday lies on a Monday -during this period, then you can use calendar_dates.txt to override this -single date. - -If the weekend schedule were used for a holiday, then you would add a -record to remove the regular schedule for the holiday date, and another -record to add the weekend schedule for the holiday date. - -Some feeds choose only to include `calendar_dates.txt` and not -`calendar.txt`, in which case there is an "add service" record for -every service and every date in this file. - -The following table shows the specification for `calendar_dates.txt`. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `service_id` | Required | The service ID that an exception is being defined for. This is referenced in both `calendar.txt` and in `trips.txt`. Unlike `calendar.txt`, it is possible for a `service_id` value to appear multiple times in this file. | -| `date` | Required | The date for which the exception is occurring, in `YYYYMMDD` format. | -| `exception_type` | Required | This indicates whether the exception is denoting an added service (`1`) or a removed service (`2`). | - -### Sample Data - -The following is an extract from the `calendar.txt` file in Adelaide -Metro's GTFS feed (). This -extract includes schedules from the start of 2014 until the end of March -2014. - -| `service_id` | `monday` | `tuesday` | `wednesday` | `thursday` | `friday` | `saturday` | `sunday` | `start_date` | `end_date` | -| :----------- | :------- | :-------- | :---------- | :--------- | :------- | :--------- | :------- | :----------- | :--------- | -| 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 20140102 | 20140331 | -| 11 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 20140102 | 20140331 | -| 12 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 20140102 | 20140331 | - -Any trip in the corresponding `trips.txt` file with a `service_id` -value of `1` runs from Monday to Friday. Trips with a `service_id` -of `11` run only on Saturday, while those with `12` run only on -Sunday. - -Now consider an extract from `calendar_dates.txt` from the same feed, -as shown in the following table. - -| `service_id` | `date` | `exception_type` | -| ------------ | -------- | ---------------- | -| 1 | 20140127 | 2 | -| 1 | 20140310 | 2 | -| 12 | 20140127 | 1 | -| 12 | 20140310 | 1 | - -The first two rows mean that on January 27 and March 10 trips with -`service_id` of `1` are not running. The final two rows mean that on -those same dates trips with `service_id` of `12` are running. This -has the following meaning: - -**"On 27 January and 10 March, use the Sunday timetable instead of the -Monday-Friday timetable."** - -In Adelaide, these two dates are holidays (Australia Day and Labour -Day). It is Adelaide Metro's policy to run their Sunday timetable on -public holidays, which is reflected by the above records in their -`calendar_dates.txt` file. - -### Structuring Services - -The case described above is the ideal case for specifying services in a -GTFS feed (dates primarily specified in `calendar.txt` with a handful -of exceptions in `calendar_dates.txt`). - -Be aware that there are two other major ways that services are specified -in feeds. - -1. Using only `calendar_dates.txt` and expressly including every - single date within the service range. Each of these is included as - "service added" (an `exception_type` value of `1`). The - following table shows how this might look. - -| `service_id` | `date` | `exception_type` | -| :----------- | :------- | :--------------- | -| 1 | 20140102 | 1 | -| 1 | 20140103 | 1 | -| 11 | 20140104 | 1 | -| 12 | 20140105 | 1 | - -2. Not using `calendar_dates.txt`, but creating many records in - `calendar.txt` instead to span various dates. The following table - shows how you can represent Monday-Friday from the sample data in - this fashion. - -| `service_id` | `monday` | `tuesday` | `wednesday` | `thursday` | `friday` | `saturday` | `sunday` | `start_date` | `end_date` | -| :----------- | :------- | :-------- | :---------- | :--------- | :------- | :--------- | :------- | :----------- | :--------- | -| 1a | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 20140102 | 20140126 | -| holiday1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 20140127 | 20140127 | -| 1b | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 20140128 | 20140309 | -| holiday2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 20140310 | 20140310 | -| 1c | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 20140311 | 20140331 | - -In this example, each holiday has its own row in `calendar.txt` that -runs for a single day only. - -Refer to *Finding Service IDs* to see how to determine -services that are running for a given day. - -### Service Name - -There are a number of feeds that specify a column in `calendar.txt` -called `service_name`. This is used to give a descriptive name to each -service. For example, the Sedona Roadrunner in Arizona -() has services called -"Weekday Service", "Weekend Service" and "New Year's Eve Service". - diff --git a/.idea/gtfs-book/ch-08-fares.md b/.idea/gtfs-book/ch-08-fares.md deleted file mode 100644 index 88bcbea..0000000 --- a/.idea/gtfs-book/ch-08-fares.md +++ /dev/null @@ -1,115 +0,0 @@ -## 8. Fare Definitions (fare_attributes.txt & fare_rules.txt) - -*These files are ***optional*** in a GTFS feed, but any rules specified -must reference a fare attributes record.* - -These two files define the types of fares that exist in a system, -including their price and transfer information. The attributes of a -particular fare exist in `fare_attributes.txt`, which has the -following columns. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `fare_id` | Required | A value that uniquely identifies a fare listed in this file. It must only appear once in this file. | -| `price` | Required | This field specifies the cost of the fare. For instance, if a trip costs $2 USD, then this value should be either `2.00` or `2`, and the `currency_type` should be `USD`. | -| `currency_type` | Required | This is the currency code that `price` is specified in, such as USD for United States Dollar. | -| `payment_method` | Required | This indicates when the fare is to be paid. `0` means it can be paid on board, while `1` means it must be paid before boarding. | -| `transfers` | Required | The number of transfers that may occur. This must either be empty (unlimited) transfers, or the number of transfers. | -| `transfer_duration` | Optional | This is the number of seconds a transfer is valid for. | - -The following table shows the specification for `fare_rules.txt`, -which defines the rules used to apply a fare to a particular trip. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `fare_id` | Required | This is the ID of the fare that a rule applies to as it appears in `fare_attributes.txt`. | -| `route_id` | Optional | The ID of a route as it appears in `routes.txt` for which this rule applies. If there are several routes with the same fare attributes, there may be a row in `fare_rules.txt` for each route. | -| `origin_id` | Optional | This value corresponds to a `zone_id` value from `stops.txt`. If specified, this means a trip must begin in the given zone in order to qualify for this fare. | -| `destination_id` | Optional | This value corresponds to a `zone_id` value from `stops.txt`. If specified, this means a trip must begin in the given zone in order to qualify for this fare. | -| `contains_id` | Optional | This value corresponds to a `zone_id` value from `stops.txt`. If specified, this means a trip must pass through every `contains_id` zone for the given fare (in other words, several rules may need to be checked). | - -Note that aside from `fare_id`, all fields are optional in this file. -This means some very complex rules can be made (especially when -transfers come into the calculation). The following URL has discussion -about different rules and some complex fare examples: - - - -Refer to *Calculating Fares* for discussion about the -algorithm for calculating fares for trips both with and without -transfers. - -### Sample Data - -The following data is taken from the TriMet GTFS feed -(). Firstly, the data from the -`fare_attributes.txt` file. - -| `fare_id` | `price` | `currency` | `payment_method` | `transfers` | `transfer_duration` | -| :-------- | :------ | :--------- | :--------------- | :---------- | :-------------------| -| B | 2.5 | USD | 0 | | 7200 | -| R | 2.5 | USD | 1 | | 7200 | -| BR | 2.5 | USD | 0 | | 7200 | -| RB | 2.5 | USD | 1 | | 7200 | -| SC | 1 | USD | 1 | 0 | | -| AT | 4 | USD | 1 | 0 | | -| VT | 0 | USD | 1 | 0 | | - -The data from the `fare_rules.txt` file is shown in the following -table. - -| `fare_id` | `route_id` | `origin_id` | `destination_id` | `contains_id` | -| :-------- | :--------- | :---------- | :--------------- | :------------ | -| B | | B | | B | -| R | | R | | R | -| BR | | B | | B | -| BR | | B | | R | -| RB | | R | | B | -| RB | | R | | R | -| SC | 193 | | | | -| SC | 194 | | | | -| AT | 208 | | | | -| VT | 250 | | | | - -In this sample data, TriMet have named some of their fares the same as -the zones specified in `stops.txt`. In this particular feed, bus stops -have a `zone_id` of `B`, while rail stops have `R`. - -The fares in this file are as follows: - -* Fare `B`. If you start at a bus stop (`zone_id` value of `B`), - you can buy your ticket on board (`payment_method` of `0`). You - may transfer an unlimited number (empty transfers value) for 2 hours - (`transfer_duration` of `7200`). The cost is $2.50 USD. -* Fare `R`. If you start at a rail stop, you must pre-purchase your - ticket. You may transfer an unlimited number of times to other rail - services for up to 2 hours. The cost is $2.50 USD. - -The `BR` fare describes a trip that begins on a bus then transfers to -a rail service (while `RB` is the opposite). This fare is not be -matched if the passenger does not travel on both, as all `contains_id` -values must be matched in order to apply a fare. - -The other fares (`SC`, `AT` and `VT`) all apply to their -respective `route_id` values, regardless of start and finish stops. -Tickets must be pre-purchased, and transfers are not allowed. The `VT` -fare (which corresponds to TriMet's Vintage Trolley) is free to ride -since it has a price of `0`. - -### Assigning Fares to Agencies - -One of the extensions available to `fare_attributes.txt` is to include -an `agency_id` column. This is to limit a specific fare to only routes -from the specified agency, in the case where a feed has multiple -agencies. - -This is useful because there may be two agencies in a feed that define -fares with no specific rules (in other words, the fare applies to all -trips). If the price differs, then GTFS dictates that the cheapest fare -is always applied. Using `agency_id` means these fares can be -differentiated accordingly. - -For more information about this extension, refer to the Google Transit -GTFS Extensions page at -. - diff --git a/.idea/gtfs-book/ch-09-shapes.md b/.idea/gtfs-book/ch-09-shapes.md deleted file mode 100644 index bc05636..0000000 --- a/.idea/gtfs-book/ch-09-shapes.md +++ /dev/null @@ -1,76 +0,0 @@ -## 9. Trip Shapes (shapes.txt) - -*This file is ***optional*** in a GTFS feed.* - -Each trip in `trips.txt` can have a shape associated with it. The -shapes.txt file defines the points that make up an individual shape in -order to plot a trip on a map. Two or more records in `shapes.txt` -with the same `shape_id` value define a shape. - -The amount of data stored in this file can be quite large. In -*Optimizing Shapes* there are some strategies to efficiently -reduce the amount of shape data. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `shape_id` | Required | An ID to uniquely identify a shape. Every point for a shape contains the same value. | -| `shape_pt_lat` | Required | The latitude for a given point in the range of `-90` to `90`. | -| `shape_pt_lon` | Required | The longitude for a given point in the range of `-180` to `180`. | -| `shape_pt_sequence` | Required | A non-negative number that defines the ordering for points in a shape. A value must not be repeated within a single shape. | -| `shape_dist_traveled` | Optional | This value represents how far along a shape a particular point exists. This is a distance in a unit such as feet or kilometers. This unit must be the same as that used in `stop_times.txt`. | - -### Sample Data - -The following table shows a portion of a shape from the TriMet GTFS -feed. It is a portion of the shape that corresponds to the sample data -in the `stop_times.txt` section. - -| `shape_id` | `shape_pt_lat` | `shape_pt_lon` | `shape_pt_sequence` | `shape_dist_traveled` | -| :--------- | :------------- | :------------- | :------------------ | :-------------------- | -| 185328 | 45.52291 | -122.677372 | 1 | 0.0 | -| 185328 | 45.522921 | -122.67737 | 2 | 3.7 | -| 185328 | 45.522991 | -122.677432 | 3 | 34.0 | -| 185328 | 45.522992 | -122.677246 | 4 | 81.5 | -| 185328 | 45.523002 | -122.676567 | 5 | 255.7 | -| 185328 | 45.523004 | -122.676486 | 6 | 276.4 | -| 185328 | 45.523007 | -122.676386 | 7 | 302.0 | -| 185328 | 45.523024 | -122.675386 | 8 | 558.4 | -| 185328 | 45.522962 | -122.67538 | 9 | 581.0 | - -In this sample data, the `shape_dist_traveled` is listed in feet. -There is no way to specify in a GTFS feed which units are used for this -column -- it could be feet, miles, meters, kilometers. In actual fact, -it does not really matter, just as long as the units are the same as in -`stop_times.txt`. - -If you need to present a distance to your users (such as how far you -need to travel on a bus), you can calculate it instead by adding up the -distance between each point and formatting it based on the user's -locale settings. - -### Point Sequences - -In most GTFS feeds the `shape_pt_sequence` value starts at 1 and -increments by 1 for every subsequent point. Additionally, points are -typically listed in order of their sequence. - -You should not rely on these two statements though, as this is not a -requirement of GTFS. Many transit agencies have automated systems that -export their GTFS from a separate system, which can sometimes result in -an unpredictable output format. - -For instance, a trip that has stop times listed with the sequences `1`, `2`, -`9`, `18`, `7`, `3` is perfectly valid. - -### Distance Travelled - -The `shape_dist_traveled` column is used so you can programmatically -determine how much of a shape to draw when showing a map to users of -your web site or app. If you use techniques in *Optimizing Shapes* -to reduce the file size of shape data, then it becomes difficult to -use this value. - -Alternatively, you can calculate portions of shapes by determining which -point in a shape travels closest to the start and finish points of a -trip. - diff --git a/.idea/gtfs-book/ch-10-frequencies.md b/.idea/gtfs-book/ch-10-frequencies.md deleted file mode 100644 index 45021ab..0000000 --- a/.idea/gtfs-book/ch-10-frequencies.md +++ /dev/null @@ -1,98 +0,0 @@ -## 10. Repeating Trips (frequencies.txt) - -*This file is ***optional*** in a GTFS feed.* - -In some cases a route may repeat a particular stopping pattern every few -minutes (picture a subway line that runs every 5 minutes). Rather than -including entries in `trips.txt` and `stop_times.txt` for every -single occurrence, you can include the trip once then define rules for -it to repeat for a period of time. - -Having a trip repeat only works in the case where the timing between -stops remains consistent for all stops. Using `frequencies.txt`, you -use the relative times between stops alongside a calculated starting -time for the trip in order to determine the specific stop times. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `trip_id` | Required | The ID of the trip as it appears in `trips.txt` that is being repeated. A single trip can appear multiple times for different time ranges. | -| `start_time` | Required | The time at which a given trip starts repeating, in `HH:MM:SS` format. | -| `end_time` | Required | The time at which a given trip stops repeating, in `HH:MM:SS` format. | -| `headway_secs` | Required | The time in seconds between departures from a given stop during the time range. | -| `exact_times` | Optional | Whether or not repeating trips should be exactly scheduled. See below for discussion. | - -### Sample Data - -The following sample data is taken from Société de transport de Montréal -(STM) in Montreal -(). - -| `trip_id` | `start_time` | `end_time` | `headway_secs` | -| :----------------------- | :----------- | :--------- | :------------- | -| 13S_13S_F1_1_2_0.26528 | 05:30:00 | 07:25:30 | 630 | -| 13S_13S_F1_1_6_0.34167 | 07:25:30 | 08:40:10 | 560 | -| 13S_13S_F1_1_10_0.42500 | 08:40:10 | 12:19:00 | 505 | -| 13S_13S_F1_1_7_0.58750 | 12:19:00 | 15:00:00 | 460 | -| 13S_13S_F1_1_11_0.66875 | 15:00:00 | 18:23:00 | 420 | -| 13S_13S_F1_1_5_0.78889 | 18:23:00 | 21:36:35 | 505 | - -Each of the trips listed here have corresponding entries in -`trips.txt` and `stop_times.txt` (more on that shortly). This data -can be interpreted as follows. - -* The first trip runs every 10m 30s from 5:30am until 7:25am. -* The second trip runs every 9m 20s from 7:25am until 8:40am, and so on. - -The following table shows some of the stop times for the first trip -(`departure_time` is omitted here for brevity, since it is identical -to `arrival_time`). - -| `trip_id` | `stop_id` | `arrival_time` | `stop_sequence` | -| :---------------------- | :-------- | :------------- | :-------------- | -| 13S_13S_F1_1_2_0.26528 | 18 | 06:22:00 | 1 | -| 13S_13S_F1_1_2_0.26528 | 19 | 06:22:59 | 2 | -| 13S_13S_F1_1_2_0.26528 | 20 | 06:24:00 | 3 | -| 13S_13S_F1_1_2_0.26528 | 21 | 06:26:00 | 4 | - -As this trip runs to the specified frequency, the specific times do not -matter. Instead, the differences are used. For the above stop times, -there is a 59 second gap between the first and second time, a 61 second -gap between the second and third, and a 120 second gap between the third -and fourth. - -The stop times for the first frequency record (10.5 minutes apart) can -be calculated as follows. - -* `05:30:00`, `05:30:59`, `05:32:00`, `05:34:00` -* `05:40:30`, `05:41:29`, `05:42:30`, `05:44:30` -* `05:51:00`, `05:51:59`, `05:53:00`, `05:55:00` -* ... -* `07:25:30`, `07:26:29`, `07:27:30`, `07:29:30` - -### Specifying Exact Times - -In the file definition at the beginning of this chapter there is an -optional field called `exact_times`. It may not be immediately clear -what this field means, so to explain it better, consider the frequency -definitions in the following table. - -| `trip_id` | `start_time` | `end_time` | `headway_secs` | `exact_times` | -| :-------- | :----------- | :--------- | :------------- | :------------ | -| T1 | 09:00:00 | 10:00:00 | 300 | 0 | -| T2 | 09:00:00 | 10:00:00 | 300 | 1 | - -These two frequencies are the same, with only the `exact_times` value -different. The first (`T1`) should be presented in a manner such as: - -**"Between 9 AM and 10 AM this trip departs every 5 minutes."** - -The second trip (`T2`) should be presented as follows: - -**"This trip departs at 9 AM, 9:05 AM, 9:10 AM, ..."** - -While ultimately the meaning is the same, this difference is used in -order to allow agencies to represent their schedules more accurately. -Often, schedules that convey to passengers that they will not have to -wait more than five minutes do so without having to explicitly list -every departure time. - diff --git a/.idea/gtfs-book/ch-11-stop-transfers.md b/.idea/gtfs-book/ch-11-stop-transfers.md deleted file mode 100644 index 6a43674..0000000 --- a/.idea/gtfs-book/ch-11-stop-transfers.md +++ /dev/null @@ -1,65 +0,0 @@ -## 11. Stop Transfers (transfers.txt) - -*This file is ***optional*** in a GTFS feed.* - -To define how passengers can transfer between routes at specific stops -feed providers can include `transfers.txt`. This does not mean -passengers cannot transfer elsewhere, but it does indicate if a transfer -is not possible between certain stops, or a minimum time required if -transfer is possible. - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `from_stop_id` | Required | The ID of stop as it appears in `stops.txt` where the connection begins. If this references a station, then this rule applies to all stops within the station. | -| `to_stop_id` | Required | The ID of the stop as it appears in `stops.txt` where the connection between trips ends. If this references a station, then this rule applies to all stops within the station. | -| `transfer_type` | Required | `0` or blank means the recommended transfer point, `1` means the secondary vehicle will wait for the first, `2` means a minimum amount of time is required, `3` means transfer is not possible. | -| `min_transfer_time` | Optional | If the `transfer_type` value is `2` then this value must be specified. It indicates the number of seconds required to transfer between the given stops. | - -It is also possible that records in this file are specified for -ticketing reasons. For instance, some train stations are set up so that -passengers can transfer between routes without needing to validate their -ticket again or buy a transfer. Other stations that are shared between -those same routes might not have this open transfer area, thereby -requiring you to exit one route fully before buying another ticket to -access the second. - -### Sample Data - -The following table shows some sample transfer rules from TriMet in -Portland's GTFS feed (). - -| `from_stop_id` | `to_stop_id` | `transfer_type` | `min_transfer_time` | -| :------------- | :----------- | :-------------- | :------------------ | -| 7807 | 5020 | 0 | | -| 7807 | 7634 | 0 | | -| 7807 | 7640 | 0 | | - -These rules indicate that if you are transferring from a route that -visits stop `7807` to any route that visits the other stops (`5020`, -`7634` or `7640`), then this is the ideal place to do it. - -In other words, if there are other locations along the first route where -you could transfer to the second route, then those stops should not be -used. These rules say this is the best place to transfer. - -Consider the transfer rule in the following table, taken from the New -York City Subway GTFS feed (). - -| `from_stop_id` | `to_stop_id` | `transfer_type` | `min_transfer_time` | -| :------------- | :----------- | :-------------- | :------------------ | -| 121 | 121 | 2 | 180 | - -In this data, the MTA specifies how long it takes to transfer to -different platforms within the same station. The stop with ID 121 refers -to the 86th St station (as specified in `stops.txt`). It has a -`location_type` of `1` and two stops within it (`121N` and -`121S`). The above transfer rule says that if you need to transfer -from `121N` to `121S` (or vice-versa) then a minimum time of 3 -minutes (180 seconds) must be allocated. - -If you were to calculate the time taken to transfer using the -coordinates of each of these platforms, it would only take a few seconds -as they are physically close to each other. In reality though, you must -exit one platform then walk around and enter the other platform (often -having to use stairs). - diff --git a/.idea/gtfs-book/ch-12-feed-information.md b/.idea/gtfs-book/ch-12-feed-information.md deleted file mode 100644 index ffad2ab..0000000 --- a/.idea/gtfs-book/ch-12-feed-information.md +++ /dev/null @@ -1,36 +0,0 @@ -## 12. Feed Information (feed_info.txt) - -*This file is ***optional*** in a GTFS feed.* - -Feed providers can include additional information about a feed using -`feed_info.txt`. It should only ever have a single row (other than the -CSV header row). - -| Field | Required? | Description | -| :----- | :-------- | :---------- | -| `feed_publisher_name` | Required | The name of the organization that publishes the feed. This may or may not be the same as any agency in `agency.txt`. | -| `feed_publisher_url` | Required | The URL of the feed publisher's web site. | -| `feed_lang` | Required | This specifies the language used in the feed. If an agency also has a language specified, then the agency's value should override this value. | -| `feed_start_date` | Optional | This value is a date in `YYYYMMDD` format that asserts the data in this feed is valid from this date. If specified, it typically matches up with the earliest date in `calendar.txt` or `calendar_dates.txt`, but if it is earlier, this is explicitly saying there are no services running between this date and the earliest service date. | -| `feed_end_date` | Optional | This value is a date in `YYYYMMDD` format that asserts the data in this feed is valid until this date. If specified, it typically matches up with the latest date in `calendar.txt` or `calendar_dates.txt`, but if it is earlier, this is explicitly saying there are no services running between the latest service date and this date. | -| `feed_version` | Optional | A string that indicates the version of this feed. This can be useful to let feed publishers know whether the latest version of their feed has been incorporated. | - -### Sample Data - -The following sample data is taken from the GTFS feed of TriMet in -Portland (). - -| `feed_publisher_name` | `feed_publisher_url` | `feed_lang` | `feed_start_date` | `feed_end_date` | `feed_version` | -| :-------------------- | :-------------------------------------- | :---------- | :---------------- | :-------------- | :---------------- | -| TriMet | [http://trimet.org](http://trimet.org/) | en | | | 20140121-20140421 | - -In this example, TriMet do not include the start or end dates, meaning -you should derive the dates this feed is active for by the dates in -`calendar_dates.txt` (this particular feed does not have a -`calendar.txt` file). - -TriMet use date stamps to indicate the feed version. This feed was -published on 21 January 2014 and includes data up until 21 April 2014, -so it appears they use the first/last dates as a way to specify their -version. Each agency has its own method. - diff --git a/.idea/gtfs-book/ch-13-importing-to-sql.md b/.idea/gtfs-book/ch-13-importing-to-sql.md deleted file mode 100644 index e96517c..0000000 --- a/.idea/gtfs-book/ch-13-importing-to-sql.md +++ /dev/null @@ -1,125 +0,0 @@ -## 13. Importing a GTFS Feed to SQL - -One of the great things about GTFS is that it is already in a format -conducive to being used in an SQL database. The presence of various IDs -in each of the different files makes it easy to join the tables in order -to extract the data you require. - -To try this yourself, download `GtfsToSql` -(). This is a Java -command-line application that imports a GTFS feed to an SQLite database. -This application also supports PostgreSQL, but the examples used here -are for SQLite. - -The pre-compiled `GtfsToSql` Java archive can be downloaded from its -GitHub repository at -. - -To use `GtfsToSql`, all you need is an extracted GTFS feed. The -following instructions demonstrate how you to import the TriMet feed -that has been referenced throughout this book. - -Firstly, download and extract the feed. The following commands use curl -to download the file, then unzip to extract the file to a sub-directory -called `trimet`. - -``` -$ curl http://developer.trimet.org/schedule/gtfs.zip > gtfs.zip - -$ unzip gtfs.zip -d trimet/ -``` - -To create an SQLite database from this feed, the following command can -be used. - -``` -$ java -jar GtfsToSql.jar -s jdbc:sqlite:./db.sqlite -g ./trimet -``` - -This may take a minute or two to complete (you will see progress as it -imports the feed and then creates indexes), and at the end you will have -a GTFS database in a file called `db.sqlite`. You can then query this -database with the command-line `sqlite3` tool, as shown in the -following example. - -``` -$ sqlite3 db.sqlite -sqlite> SELECT * FROM agency; -|TriMet|http://trimet.org|America/Los_Angeles|en|503-238-7433 - -sqlite> SELECT * FROM routes WHERE route_type = 0; -90|||MAX Red Line||0|http://trimet.org/schedules/r090.htm|| -100|||MAX Blue Line||0|http://trimet.org/schedules/r100.htm|| -190|||MAX Yellow Line||0|http://trimet.org/schedules/r190.htm|| -193||Portland Streetcar|NS Line||0|http://trimet.org/schedules/r193.htm|| -194||Portland Streetcar|CL Line||0|http://trimet.org/schedules/r194.htm|| -200|||MAX Green Line||0|http://trimet.org/schedules/r200.htm|| -250|||Vintage Trolley||0|http://trimet.org/schedules/r250.htm|| -``` - -The first query above finds all agencies stored in the database, while -the second finds all routes marked as Light Rail (`route_type` of -`0`). - -**Note: In the following chapters there are more SQL examples. All of -these examples are geared towards running on an SQLite database that has -been created in this manner.** - -All tables in this database match up with the corresponding GTFS -filename (so for `agency.txt`, the table name is `agency`, while for -`stop_times.txt` the table name is `stop_times`). The columns in SQL -have the same name as the value in the corresponding GTFS file. - -***Note:** All data imported using this tool is stored as text in the -database. This means you may need to be careful when querying integer -data. For example, ordering stop times by stop_sequence may not produce -expected results (for instance, 29 as a string comes before 3). Although -it is a performance hit, you can change this behavior by casting the -value to integer, such as: ORDER BY stop_sequence + 0. The reason -`GtfsToSql` works in this way is because it is intended as a lightweight -tool to be able to quickly query GTFS data. I recommend rolling your own -importer to treat data exactly as you need it, especially in conjunction -with some of the optimization techniques recommended later in this book.* - -### File Encodings - -The GTFS specification does not indicate whether files should be encoded -using UTF-8, ISO-8859-1 or otherwise. Since a GTFS feed is not -necessarily in English, you must be willing to handle an extended -character set. - -The GtfsToSql tool introduced above automatically detects the encoding -of each file using the juniversalchardet Java library -(). - -I recommend you take some time looking at the source code of GtfsToSql -to further understand this so you are aware of handling encodings -correctly if you write your own parser. - -### Optimizing GTFS Feeds - -If you are creating a database that is to be distributed onto a mobile -device such as an iPhone or Android phone, then disk space and -computational power is at a premium. Even if you are setting up a -database to be queried on a server only, then making the database -perform as quickly as possible is still important. - -In the following chapters are techniques for optimizing GTFS feeds. -There are many techniques that can be applied to improve the performance -of GTFS, such as: - -* Using integer identifiers rather than string identifiers (for route - IDs, trip IDs, stop IDs, etc.) and creating appropriate indexes -* Removing redundant shape points and encoding shapes -* Deleting unused data -* Reusing repeating trip patterns. - -Changing the data to use integer IDs makes the greatest improvement to -performance, but the other techniques also help significantly. - -Depending on your needs, there are other optimizations that can be made -to reduce file size and speed up querying of the data, but the ease of -implementing them may depend on your database storage system and the -programming language used to query the data. The above list is a good -starting point. - diff --git a/.idea/gtfs-book/ch-14-switching-to-integer-ids.md b/.idea/gtfs-book/ch-14-switching-to-integer-ids.md deleted file mode 100644 index 4d3914a..0000000 --- a/.idea/gtfs-book/ch-14-switching-to-integer-ids.md +++ /dev/null @@ -1,111 +0,0 @@ -## 14. Switching to Integer IDs - -There are a number of instances in a GTFS feed where IDs are used, such -as to identify routes, trips, stops and shapes. There are no specific -guidelines in GTFS as to the type of data or length an ID can be. As -such, IDs in some GTFS feeds maybe anywhere up to 30 or 40 characters -long. - -Using long strings as IDs is extremely inefficient as they make the size -of a database much larger than it needs to be, as well as making -querying the data much slower. - -To demonstrate, consider a GTFS feed where trip IDs are 30 characters -long. If there are 10,000 trips, each with an average of 30 stops in -`stop_times.txt`, then the IDs alone take up 9.3 MB of storage. -Realistically speaking, you need to index the `trip_id` field in order -to look up a trip's stop times quickly, which uses even more space. - -The following SQL statements show how you might represent GTFS without -optimizing the identifiers. For brevity, not all fields from the GTFS -feed are included here. - -```sql -CREATE TABLE trips ( - trip_id TEXT, - route_id TEXT, - service_id TEXT -); - -CREATE INDEX trips_trip_id ON trips (trip_id); - -CREATE INDEX trips_route_id ON trips (route_id); - -CREATE INDEX trips_service_id ON trips (service_id); - -CREATE TABLE stop_times ( - trip_id TEXT, - stop_id TEXT, - stop_sequence INTEGER -); - -CREATE INDEX stop_times_trip_id ON stop_times (trip_id); - -CREATE INDEX stop_times_stop_id on stop_times (stop_id); -``` - -If you were to add an integer column to `trips` called, say, -`trip_index`, then you can reference that value from `stop_times` -instead of `trip_id`. The following SQL statements show this. - -```sql -CREATE TABLE trips ( - trip_id TEXT, - trip_index INTEGER, - route_id TEXT, - service_id TEXT -); - -CREATE INDEX trips_trip_id ON trips (trip_id); -CREATE INDEX trips_trip_index ON trips (trip_index); -CREATE INDEX trips_route_id ON trips (route_id); -CREATE INDEX trips_service_id ON trips (service_id); - -CREATE TABLE stop_times ( - trip_index INTEGER, - stop_id TEXT, - stop_sequence INTEGER -); - -CREATE INDEX stop_times_trip_index ON stop_times (trip_index); - -CREATE INDEX stop_times_stop_id on stop_times (stop_id); -``` - -This results in a significant space saving (when you consider how large -`stop_times` can be), as well as being far quicker to look up stop -times based on a trip ID. Note that the original `trip_id` value is -retained so it can be referenced if required. - -Without adding `trip_index`, you would use the following query to find -stop times given a trip ID. - -```sql -SELECT * FROM stop_times - WHERE trip_id = 'SOME_LONG_TRIP_ID' - ORDER BY stop_sequence; -``` - -With the addition of `trip_index`, you need to first find the record -in `trips`. This can be achieved using the following query. This is a -small sacrifice compared to performing string comparison on all stop -times. - -```sql -SELECT * FROM stop_times - WHERE trip_index = ( - SELECT trip_index FROM trips WHERE trip_id = 'SOME_LONG_TRIP_ID' - ) - ORDER BY stop_sequence; -``` - -You can make the same change for the other IDs in the feed, such as -`route_id` and `stop_id`. For these columns you still keep (and -index) the original values in `routes` and `stops` respectively, -since you may still need to look up records based on these values. - -***Note:** Even though this book recommends optimizing feeds in this -manner, the remainder of examples in this book only use their original -IDs, in order to simplify the examples and to ensure compatibility with -the `GtfsToSql` tool introduced previously.* - diff --git a/.idea/gtfs-book/ch-15-optimizing-shapes.md b/.idea/gtfs-book/ch-15-optimizing-shapes.md deleted file mode 100644 index fbb1125..0000000 --- a/.idea/gtfs-book/ch-15-optimizing-shapes.md +++ /dev/null @@ -1,158 +0,0 @@ -## 15. Optimizing Shapes - -Shape data in a GTFS feed (that is, the records from `shapes.txt`) -represents a large amount of data. There are a number of ways to reduce -this data, which can help to: - -* Speed up data retrieval -* Reduce the amount of data to transmit to app / web site users -* Speed up rendering of the shape onto a map (such as a native mobile - map or a JavaScript map). - -Two ways to reduce shape data are as follows: - -* **Reducing the number of points in a shape.** The shapes included in - GTFS are often very precise and include a number of redundant - points. Many of these can be removed without a noticeable loss of - shape quality using the *Douglas-Peucker Algorithm*. -* **Encoding all points in a shape into a single value.** The *Encoded - Polyline Algorithm* used in the Google Maps JavaScript API can also - be used with GTFS shapes. This reduces the amount of storage - required and also makes looking up all points in a shape far - quicker. - -### Reducing Points in a Shape - -Many of the shapes you find in GTFS feeds are extremely detailed. They -often follow the exact curvature of the road and may consist of hundreds -or thousands of points for a trip that might have only 30 or 40 stops. - -While this level of detail is useful, the sheer amount of data required -to be rendered on a map can be a massive performance hit from the -perspective of retrieving the data as well as rendering on a map. -Realistically, shapes do not need this much detail in order to convey -their message to your users. - -Consider the following shape from Portland that has been rendered using -Google Maps. The total shape consists of 1913 points. - -![Original Shape](images/shape-original.jpg) - -Compare this now to the same shape that has had redundant points -removed. The total number of points in this shape is 175, which -represents about a 90% reduction. - -![Reduced Shape](images/shape-reduced.jpg) - -If you look closely, you can see some minor loss of detail, but for the -most part, the shapes are almost identical. - -This reduction in points can be achieved using the Douglas-Peucker -Algorithm. It does so by discarding points that do not deviate -significantly between its surrounding points. - -The Douglas-Peucker Algorithm works as follows: - -* Begin with the first and last points in the path (A and B). These - are always kept. -* Find the point between the first and last that is furthest away from - the line joining the first and last line (the orthogonal distance -- - see the figure below). -* If this point is greater than the allowed distance (the tolerance - level), the point is kept (call it X). -* Repeat this algorithm twice: once using A as the first point and X - as the last point, then again using X as the first point and B as - the last point. - -This algorithm is recursive, and continues until all points have been -checked. - -***Note:** The tolerance level determines how aggressively points are -removed. A higher tolerance value is less aggressive and discards less -data, while a lower tolerance discards more data.* - -The following diagram shows what orthogonal distance means. - -![Orthogonal Distance](images/orthogonal-distance.jpg) - -The following resources provide more information about the -Douglas-Peucker Algorithm and describe how to implement it in your own -systems: - -* -* -* . - -You can often discard about 80-90% of all shape data before seeing a -significant loss of line detail. - -### Encoding Shape Points - -A single entry in `shapes.txt` corresponds to a single point in a -single shape. Each entry includes a shape ID, a latitude and longitude. - -***Note:** The `shape_dist_traveled` field is also included, but you do not -strictly need to use this field (nor the corresponding field in -stop_times.txt). The technique described in this section will not work -if you intend to use `shape_dist_traveled`.* - -This means if you want to look up a shape by its ID, you may need to -retrieve several hundreds of rows from a database. Using the Encoded -Polyline Algorithm you can change your GTFS database so each shape is -represented by a single row in a database. This means the shape can be -found much more quickly and much less data needs to be processed to -determine the shape. - -Consider the following data, taken from TriMet's `shapes.txt` file. -This data represents the first five points of a shape. - -| `shape_id` | `shape_pt_lat` | `shape_pt_lon` | `shape_pt_sequence` | `shape_dist_traveled` | -| :--------- | :------------- | :------------- | :------------------- | :--------------------- | -| `185328` | `45.52291 ` | `-122.677372` | `1` | `0.0` | -| `185328` | `45.522921` | `-122.67737` | `2` | `3.7` | -| `185328` | `45.522991` | `-122.677432` | `3` | `34.0` | -| `185328` | `45.522992` | `-122.677246` | `4` | `81.5` | -| `185328` | `45.523002` | `-122.676567` | `5` | `255.7` | - -If you apply the Encoded Polyline Algorithm to this data, the -coordinates can be represented using the following string. - -``` -eeztGrlwkVAAML?e@AgC -``` - -To learn how to arrive at this value, you can read up on the Encoded -Polyline Algorithm at -. - -Instead of having every single shape point in a single table, you can -create a table that has one record per shape. The following SQL -statement is a way you could achieve this. - -```sql -CREATE TABLE shapes ( - shape_id TEXT, - encoded_shape TEXT -); -``` - -The following table shows how this data could be represented in a -database. - -| `shape_id` | `encoded_shape` | -| :--------- | :--------------------- | -| `185328` | `eeztGrlwkVAAML?e@AgC` | - -Storing the shape in this manner means you can retrieve an entire shape -by looking up only one database row and running it through your decoder. - -To further demonstrate how both the encoding and decoding works, try out -the polyline utility at -. - -You can find implementations for encoding and decoding points for -various languages at the following locations: - -* -* - diff --git a/.idea/gtfs-book/ch-16-deleting-unused-data.md b/.idea/gtfs-book/ch-16-deleting-unused-data.md deleted file mode 100644 index f2a2fc0..0000000 --- a/.idea/gtfs-book/ch-16-deleting-unused-data.md +++ /dev/null @@ -1,106 +0,0 @@ -## 16. Deleting Unused Data** - -Once you have imported a GTFS feed into a database, it is possible for -there to be a lot of redundant data. This can ultimately slow down any -querying of that data as well as bloating the size of the database. If -you are making an app where the GTFS database is queried on the device -then disk space and computational time are at a premium, so you must do -what you can to reduce resource usage. - -The first thing to check for is expired services. You can do this by -searching `calendar.txt` for entries that expire before today's date. -Be aware though, you also need to ensure there are no -`calendar_dates.txt` entries overriding these services (a service -could have an `end_date` of, say, `20140110`, but also have a -`calendar_dates.txt` entry for `20140131`). - -Firstly, find service IDs in `calendar_dates.txt` that are still -active using the following query. - -```sql -SELECT service_id FROM calendar_dates WHERE date >= '20140110'; -``` - -***Note:** In order to improve performance of searching for dates, you -should import the date field in `calendar_dates.txt` as an integer, as -well as `start_date` and `end_date` in `calendar.txt`.* - -Any services matched in this query should not be removed. You can then -find service IDs in `calendar.txt` with the following SQL query. - -```sql -SELECT * FROM calendar WHERE end_date < '20140110' - AND service_id NOT IN ( - SELECT service_id FROM calendar_dates WHERE date >= '20140110' - ); -``` - -Before deleting these services, corresponding trips and stop times must -be removed since you need to know the service ID in order to delete a -trip. Likewise, stop times must be deleted before trips since you need -to know the trip IDs to be removed. - -```sql -DELETE FROM stop_times WHERE trip_id IN ( - SELECT trip_id FROM trips WHERE service_id IN ( - SELECT service_id FROM calendar WHERE end_date < '20140110' - AND service_id NOT IN ( - SELECT service_id FROM calendar_dates WHERE date >= '20140110' - ) - ) - ); -``` - -Now there may be a series of trips with no stop times. Rather than -repeating the above sub-queries, a more thorough way of removing trips -is to remove trips with no stop times. - -```sql -DELETE FROM trips WHERE trip_id NOT IN ( - SELECT DISTINCT trip_id FROM stop_times -); -``` - -With all `service_id` references removed, you can remove the expired -rows from `calendar.txt` using the following SQL query. - -```sql -DELETE FROM calendar WHERE end_date < '20140110' - AND service_id NOT IN ( - SELECT DISTINCT service_id FROM calendar_dates WHERE date >= '20140110' - ); -``` - -The expired rows in `calendar_dates.txt` can also be removed, which -can be achieved using the following query. - -```sql -DELETE FROM calendar_dates WHERE date < '20140110'; -``` - -There may now be some stops that are not used by any trips. These can be -removed using the following query. - -```sql -DELETE FROM stops WHERE stop_id NOT IN ( - SELECT DISTINCT stop_id FROM stop_times -); -``` - -Additionally, you can remove unused shapes and routes using the -following queries. - -```sql -DELETE FROM shapes WHERE shape_id NOT IN ( - SELECT DISTINCT shape_id FROM trips -); - -DELETE FROM routes WHERE route_id NOT IN ( - SELECT DISTINCT route_id FROM trips -); -``` - -There are other potential rows that can be removed (such as records in -`transfers.txt` that reference non-existent stops), but hopefully you -get the idea from the previous queries. - diff --git a/.idea/gtfs-book/ch-17-searching-for-trips.md b/.idea/gtfs-book/ch-17-searching-for-trips.md deleted file mode 100644 index dd0a901..0000000 --- a/.idea/gtfs-book/ch-17-searching-for-trips.md +++ /dev/null @@ -1,346 +0,0 @@ -## 17. Searching for Trips** - -This section shows you how to search for trips in a GTFS feed based on -specified times and stops. - -The three scenarios covered are: - -* Finding all stops departing from a given stop after a certain time -* Finding all stops arriving at a given stop before a certain time -* Finding all trips between two stops after a given time - -The first and second scenarios are the simplest, because they only rely -on a single end of each trip. The third scenario is more complex because -you have to ensure that each trip returned visits both the start and -finish stops. - -When searching for trips, the first thing you need to know is which -services are running for the given search time. - -### Finding Service IDs - -The first step in searching for trips is to determine which services are -running. To begin with, you need to find service IDs for a given date. -You then need to handle exceptions accordingly. That is, you need to add -service IDs and remove service IDs based on the rules in -`calendar_dates.txt`. - -***Note:** In *Scheduling Past Midnight*, you were shown how -GTFS works with times past midnight. The key takeaway from this is that -you have to search for trips for two sets of service IDs. This is -covered as this chapter progresses.* - -In Australia, 27 January 2014 (Monday) was the holiday for Australia -Day. This is used as an example to demonstrate how to retrieve service -IDs. - -Firstly, you need the main set of service IDs for the day. The following -SQL query achieves this. - -```sql -SELECT service_id FROM calendar - WHERE start_date <= '20140127' AND end_date >= '20140127' - AND monday = 1; - -# Result: 1, 2, 6, 9, 18, 871, 7501 -``` - -Next you need to find service IDs that are to be excluded, as achieved -by the following SQL query. - -```sql -SELECT service_id FROM calendar_dates - WHERE date = '20140127' AND exception_type = 2; - -# Result: 1, 2, 6, 9, 18, 871, 7501 -``` - -Finally, you need to find service IDs that are to be added. This query -is identical to the previous query, except for the different -`exception_type` value. - -```sql -SELECT service_id FROM calendar_dates - WHERE date = '20140127' AND exception_type = 1; - -# Result: 12, 874, 4303, 7003 -``` - -You can combine these three queries all into a single query in SQLite -using `EXCEPT` and `UNION`, as shown in the following SQL query. - -```sql -SELECT service_id FROM calendar - WHERE start_date <= '20140127' AND end_date >= '20140127' - AND monday = 1 - -UNION - -SELECT service_id FROM calendar_dates - WHERE date = '20140127' AND exception_type = 1 - -EXCEPT - -SELECT service_id FROM calendar_dates - WHERE date = '20140127' AND exception_type = 2; - -# Result: 12, 874, 4304, 7003 -``` - -Now when you search for trips, only trips that have a matching -`service_id` value are included. - -### Finding Trips Departing a Given Stop - -In order to determine the list of services above, a base timestamp on -which to search is needed. For the purposes of this example, assume that -timestamp is 27 January 2014 at 1 PM (`13:00:00` when using GTFS). - -This example searches for all services departing from Adelaide Railway -Station, which has stop ID `6665` in the Adelaide Metro GTFS feed. To -find all matching stop times, the following query can be performed. - -```sql -SELECT * FROM stop_times - WHERE stop_id = '6665' - AND departure_time >= '13:00:00' - AND pickup_type = 0 - ORDER BY departure_time; -``` - -This returns a series of stop times that match the given criteria. The -only problem is it does not yet take into account valid service IDs. - -***Note:** This query may also return stop times that are the final stop -on a trip, which is not useful for somebody trying to find departures. -You may want to modify your database importer to override the final stop -time of each trip so its `pickup_type` has a value of `1` (no pick-up) and -its first stop time so it has a `drop_off_type` of `1` (no drop-off).* - -To make sure only the correct trips are returned, join `stop_times` -with `trips` using `trip_id`, and then include the list of service -IDs. For the purposes of this example the service IDs, stop ID and -departure time are being hard-coded. You can either embed a sub-query, -or include the service IDs via code. - -```sql -SELECT t.*, st.* FROM stop_times st, trips t - WHERE st.stop_id = '6665' - AND st.trip_id = t.trip_id - AND t.service_id IN ('12', '874', '4304', '7003') - AND st.departure_time >= '13:00:00' - AND st.pickup_type = 0 - ORDER BY st.departure_time; -``` - -This gives you the final list of stop times matching the desired -criteria. You can then decide specifically which data you need to -retrieve; you now have the `trip_id`, meaning you can find all stop -times for a given trip if required. - -If you need to restrict the results to only those that occur after the -starting stop, you can retrieve stop times with only a `stop_sequence` -larger than that of the stop time returned in the above query. - -### Finding Trips Arriving at a Given Stop - -In order to find the trips arriving at a given stop before a specified -time, it is just a matter of making slight modifications to the above -query. Firstly, check the `arrival_time` instead of -`departure_time`. Also, check the `drop_off_type` value instead of -`pickup_type`. - -```sql -SELECT t.*, st.* FROM stop_times st, trips t - WHERE st.stop_id = '6665' - AND st.trip_id = t.trip_id - AND t.service_id IN ('12', '874', '4304', '7003') - AND st.arrival_time <= '13:00:00' - AND st.drop_off_type = 0 - ORDER BY st.arrival_time DESC; -``` - -For this particular data set, there are trips from four different routes -returned. If you want to restrict this to a particular route, you can -filter on the `t.route_id` value. - -```sql -SELECT t.*, st.* FROM stop_times st, trips t - WHERE st.stop_id = '6665' - AND st.trip_id = t.trip_id - AND t.service_id IN ('12', '874', '4304', '7003') - AND t.route_id = 'BEL' - AND st.arrival_time <= '13:00:00' - AND st.drop_off_type = 0 - ORDER BY st.arrival_time DESC; -``` - -### Performance of Searching Text-Based Times - -These examples search using text-based arrival/departure times (such as -`13:00:00`). This works because the GTFS specification mandates that -all times are `HH:MM:SS` format (although `H:MM:SS` is allowed for times -earlier than 10 AM). - -Doing this kind of comparison (especially if you are scanning millions -of rows) is quite slow and expensive. It is more efficient to convert -all times stored in the database to integers that represent the number -of seconds since midnight. - -***Note:** The GTFS specification states that arrival and departure times -are "noon minus 12 hours" in order to account for daylight savings time. -This is effectively midnight, except for the days that daylight savings -starts or finishes.* - -In order to achieve this, you can convert the text-based time to an -integer with `H * 3600 + M * 60 + S`. For example, `13:35:21` can -be converted using the following steps. - -``` - (13 * 3600) + (35 * 60) + (21) -= 46800 + 2100 + 21 -= 48921 -``` - -You can then convert back to hours, minutes and seconds in order to -generate timestamps in your application as shown in the following -algorithm. - -``` -H = floor( 48921 / 3600 ) - = floor( 13.59 ) - = 13 - -M = floor( 48921 / 60 ) % 60 - = floor( 815.35 ) % 60 - = 815 % 60 - = 35 - -S = 48921 % 60 - = 21 -``` - -### Finding Trips Between Two Stops - -Now that you know how to look up a trip from or to a given stop, the -previous query can be expanded so both the start and finish stop are -specified. The following example finds trips that depart after 1 PM. The -search only returns trips departing from *Adelaide Railway Station* -(stop ID `6665`) are shown. Additionally, only trips that then visit -*Blackwood Railway Station* (stop IDs `6670` and `101484`) are -included. - -In order to achieve this, the following changes must be made to the -previous examples. - -* **Join against stop_times twice.** Once for the departure stop time - and once for the arrival stop time. -* **Allow for multiple stop IDs at one end.** The destination in this - example has two platforms, so you need to check both of them. -* **Ensure the departure time is earlier than the arrival time. - **Otherwise trips heading in the opposite direction may also be - returned. - -The following query demonstrates how this is achieved. - -```sql -SELECT t.*, st1.*, st2.* - FROM trips t, stop_times st1, stop_times st2 - WHERE st1.trip_id = t.trip_id - AND st2.trip_id = t.trip_id - AND st1.stop_id = '6665' - AND st2.stop_id IN ('6670', '101484') - AND t.service_id IN ('12', '874', '4304', '7003') - AND st1.departure_time >= '13:00:00' - AND st1.pickup_type = 0 - AND st2.drop_off_type = 0 - AND st1.departure_time < st2.arrival_time - ORDER BY st1.departure_time; -``` - -In this example, the table alias `st1` is used for the departure stop -time. Once again, the stop ID must match, as well as the departure time -and pick-up type. - -For the arrival stop time the alias `st2` is used. This table also -joins the `trips` table using `trip_id`. Since the destination has -multiple stop IDs, the SQL `IN` construct is used. The arrival time is -not important in this example, so only the departure is checked. - -The final thing to check is that the departure occurs before the -arrival. If you do not perform this step, then trips traveling in the -opposite direction may also be returned. - -***Note:** Technically, there may be multiple results returned for the -same trip. For some transit agencies, a single trip may visit the a stop -more than once. If this is the case, you should also check the trip -duration (arrival time minus departure time) and use the shortest trip -when the same trip is returned multiple times.* - -### Accounting for Midnight - -As discussed previously, GTFS has the ability for the trips to depart or -arrive after midnight for a given service day without having to specify -it as part of the next service day. Consequently, while the queries -above are correct, they do not necessarily paint the full picture. - -In reality, when performing a trip search, you need to take into account -trips that have wrapped times (for instance, where 12:30 AM is specified -as `24:30:00`). If you want to find trips that depart after 12:30 AM -on a given day, you need to check for trips departing after -`00:30:00` on that day, as well as for trips departing at -`24:30:00` on the previous day. - -This means that for each trip search you are left with two sets of -trips, which you must then merge and present as appropriate. - -***Note:** In reality, agencies generally do not have trips that overlap -from multiple service days, so technically you often only need one query -(for example, a train service might end on 12:30 AM then restart on the -next service day at 4:30 AM). If your app / web site only uses a single -feed where you can tune your queries manually based on how the agency -operates, then you can get away with only querying a single service day. -On the other hand, if you are building a scalable system that works with -data from many agencies, then you need to check both days.* - -To demonstrate how this works in practice, the following example -searches for all trips that depart after 12:30:00 AM on 14 March 2014. -The examples earlier in this chapter showed how to find the service IDs -for a given date. To account for midnight, service IDs for both March 14 -(the "main" service date) and March 13 (the overlapping date) need to be -determined. - -Assume that March 13 has a service ID of `C1` and March 14 has a -service ID of `C2`. First you need to find the departures for March -14, as shown in the following query. - -```sql -SELECT t.*, st.* FROM stop_times st, trips t - WHERE st.stop_id = 'S1' - AND st.trip_id = t.trip_id - AND t.service_id IN ('C1') - AND st.departure_time >= '00:30:00' - AND st.pickup_type = 0 - ORDER BY st.departure_time; -``` - -The resultant list needs to be combined with the trips that depart after -midnight from the March 13 service. To check this, it is just a matter -of swapping in the right service IDs, then adding 24 hours to the search -time. - -```sql -SELECT t.*, st.* FROM stop_times st, trips t - WHERE st.stop_id = 'S1' - AND st.trip_id = t.trip_id - AND t.service_id IN ('C2') - AND st.departure_time >= '24:30:00' - AND st.pickup_type = 0 - ORDER BY st.departure_time; -``` - -In order to get your final list of trips you must combine the results -from both of these queries. If you are generating complete timestamps in -order to present the options to your users, just remember to account for -the results from the second query being 24 hours later. - diff --git a/.idea/gtfs-book/ch-18-working-with-trip-blocks.md b/.idea/gtfs-book/ch-18-working-with-trip-blocks.md deleted file mode 100644 index d59b2c4..0000000 --- a/.idea/gtfs-book/ch-18-working-with-trip-blocks.md +++ /dev/null @@ -1,109 +0,0 @@ -## 18. Working With Trip Blocks** - -One of the more complex aspects of GTFS is how to properly use the -`block_id` field in `trips.txt`. The concept is simple, but -incorporating this information in a manner that is simple to understand -for a passenger can be more difficult. - -A single vehicle (such as a bus or train) completes multiple trips in a -single service day. For instance, once a bus completes its trip (Trip X) -from Location A to Location B, it then begins another trip (Trip Y) from -Location B to Location C. It then completes a final trip (Trip Z) from -Location C back to Location A. - -If a passenger boards in the middle of Trip X and is allowed to stay on -the bus until the end of the Trip Y, then this should be represented in -GTFS by giving Trips X, Y, Z the same `block_id` value. - -The following diagram shows a vehicle that performs two trips, -completing opposite directions of the same route. Often a single vehicle -will do many more than just two trips in a day. - -![Block](images/block-01.png) - -***Note:** When trips in a block are represented in stop_times.txt, the -final stop of a trip and the first stop of the subsequent trip must both -be included, even though they are typically at the same stop (and often -the same arrival & departure time). The final stop of the first trip is -drop-off only, while the first stop of the next trip is pick-up only.* - -In the following diagram, the vehicle completes three trips, each for -separate routes. As in the previous diagram, the vehicle will likely -complete more trips over a single day. - -![Block](images/block-02.png) - -These two diagrams show the difficulties that can arise when trying to -calculate trips across multiple blocks. The first diagram is repeated -below, this time with stops marked so trips between two locations can be -found. - -![Block](images/block-03.png) - -Consider the scenario where you want to retrieve all trip times from -Stop S2 to Stop S1. Since the vehicle gets to Stop S3 then turns around -and heads back to S1, the trip search returns two options: - -1. Board at S2 on the upper trip, travel via S3, then get to S1. -2. Board at S2 on the lower trip, travel directly to S1. - -The second option is a subset of the first option, which means it is a -far shorter trip (plus it avoids the passenger getting annoyed from -passing their starting point again). To determine which option to use, -you can use the trip duration. - -Now consider the other type of block formation, where a vehicle -completes subsequent trips from different routes. - -![Block](images/block-04.png) - -If a passenger wants to travel from Stop S2 to S4, you will not get the -situation where the vehicle travels past the same location twice. -However, it is also possible for a passenger to travel from Stop S2 to -Stop S6 without ever having to change vehicles. - -Referring back to the previous chapter about performing trip searches, -it is relatively straightforward to account for blocks in these -searches. Using the query to find trips between two stops as a -reference, the following changes need to be made: - -* Instead of joining both occurrences of `stop_times` to the - `trips` table, you now need two occurrences of `trips`. -* Each `stop_times` table occurrence is joined to a separate - `trips` table occurrence. -* The two occurrences of `trips` are joined on the `block_id` - value (if it is available). - -The following query shows how to find trips that start at stop `S2` -and finish at stop `S4`, departing after 1 PM on the day with service -ID `C1`. - -```sql -SELECT t1.*, t2.*, st1.*, st2.* - FROM trips t1, trips t2, stop_times st1, stop_times st2 - WHERE st1.trip_id = t1.trip_id - AND st2.trip_id = t2.trip_id - AND st1.stop_id = 'S2' - AND st2.stop_id = 'S4' - AND t1.service_id = 'C1' - AND ( - t1.trip_id = t2.trip_id - OR ( - LENGTH(t1.block_id) > 0 AND t1.block_id = t2.block_id - ) - ) - AND st1.departure_time >= '13:00:00' - AND st1.pickup_type = 0 - AND st2.drop_off_type = 0 - AND st1.departure_time < st2.arrival_time - ORDER BY st1.departure_time; -``` - -Since `block_id` may be unpopulated, the query joins on both the -`trip_id` and the `block_id`. - -***Note:** An alternative is to guarantee every single trip has a -`block_id` value when importing -- even if some blocks only consist of -a single trip. If you can guarantee this condition, then you can -simplify this query by using `t1.block_id = t2.block_id`.* - diff --git a/.idea/gtfs-book/ch-19-calculating-fares.md b/.idea/gtfs-book/ch-19-calculating-fares.md deleted file mode 100644 index df24dd4..0000000 --- a/.idea/gtfs-book/ch-19-calculating-fares.md +++ /dev/null @@ -1,296 +0,0 @@ -## 19. Calculating Fares** - -In order to calculate a fare for a trip in GTFS, you must use data from -`fare_attributes.txt` and `fare_rules.txt`. It is relatively -straightforward to calculate the cost of a single trip (that is, -boarding a vehicle, traveling for a number of stops, then disembarking), -but it becomes much more complicated when you need to take into account -multiple trip segments (that is, one or more transfers). - -***Note:** As it stands, many feed providers do not include fare -information. This is because many systems have a unique set of rules -that cannot be modelled with the current structure of fares in GTFS. -Additionally, it is not possible to determine different classes of -pricing (such as having a separate price for adults and children). For -the purposes of this chapter, these limitations are ignored.* - -For more discussion on how fares work in GTFS, refer to *Fare -Definitions (`fare_attributes.txt` & `fare_rules.txt`)*. - -This chapter first shows you how to calculate fares for a single trip, -then how to account for transfers and for multiple trips. - -### Calculating a Single Trip Fare - -Much of the logic used when calculating fares requires knowledge of the -zones used in a trip. - -***Note:** A zone is a physical area within a transit system that -contains a series of stops. They are used to group trip pricing into key -areas in a system. Some transit systems do not work like this (for -instance, they may measure the physical distance travelled rather than -specific stops), which is one reason why the GTFS fares model does not -work in all cases.* - -A zone is defined in GTFS by the `zone_id` column in `stops.txt`. A -single stop can only belong to one zone. - -Fares are matched to a trip using a combination of any of the following: - -* The route of the trip -* The zone of the stop the passenger boards from -* The zone of the stop the passenger disembarks -* The zone(s) of any stops on the trip that are passed while the - passenger is on board. - -Consider the following simplified data set that may appear in -`stops.txt` and `stop_times.txt`. Assume for this example that the -trip `T1` belongs to a route with an ID of `R1`. - -``` -stop_id,zone_id -S1,Z1 -S2,Z1 -S3,Z2 -S4,Z3 - -trip_id,stop_id,stop_sequence -T1,S1,1 -T1,S2,2 -T1,S3,3 -T1,S4,4 -``` - -If a passenger travels from stop S1 to stop S4, then their starting zone -is Z1, their finishing zone is Z3, and the zones they pass through are -Z1, Z2 and Z3. - -***Note:** When calculating fares, the start and finish zones are also -included in the zones passed through, so in this example you Z3 is also -considered as a zone that the trip passes through.* - -Using this data, you can now calculate matching fares. To do so, you -need to find all fares that match either of the following: - -* Fares that have no associated rules. -* Fares that have rules that match the specified trip. If a fare - defines multiple zones that must be passed through (using - `contains_id`), then all zones must be matched. - -If multiple fares qualify for a trip, then the cheapest fare is the one -to use. - -### Finding Fares With No Rules - -This is the simplest use-case for fares. You can find all matching fares -with the following SQL. - -```sql -SELECT * FROM fare_attributes WHERE fare_id NOT IN ( - SELECT DISTINCT fare_id FROM fare_rules -); -``` - -If a feed only has `fare_attributes.txt` records with no rules, then -the difference between the fares is in the transfer rules. This section -only covers calculating fares for a single trip with no transfers, so -for now you can just select the cheapest fare using the following SQL. - -```sql -SELECT * FROM fare_attributes WHERE fare_id NOT IN ( - SELECT DISTINCT fare_id FROM fare_rules - ) - ORDER BY price + 0 LIMIT 1; -``` - -***Note:** You still need to check for fares with one or more rules in -order to find the cheapest price. Also, `0` is added in this query in -order to cast a string to a number. When you roll your own importer you -should instead import this as a numerical value.* - -### Finding Fares With Matched Rules - -Next you must check against specific rules for a fare. In order to do -this, you need the starting zone, finishing zone, and all zones passed -through (including the start and finish zones). - -Referring back to the previous example, if a trip starts at `Z1`, -passes through `Z2` and finishes at `Z3`, you can find fare -candidates (that is, trips that *may* match), using the following SQL -query. - -```sql -SELECT * FROM fare_attributes WHERE fare_id IN ( - SELECT fare_id FROM fare_rules - WHERE (LENGTH(route_id) = 0 OR route_id = 'R1') - AND (LENGTH(origin_id) = 0 OR origin_id = 'Z1') - AND (LENGTH(destination_id) = 0 OR destination_id = 'Z3') - AND (LENGTH(contains_id) = 0 OR contains_id IN ('Z1', 'Z2', 'Z3') - ) - ); -``` - -This returns a list of fares that may qualify for the given trip. As -some fares have multiple rules, all must be checked. The algorithm to -represent this is as follows. - -``` -fares = [ result from above query ] - -qualifyingFares = [ ] - -for (fare in fares) { - if (qualifies(fare)) - qualifyingFares.add(fare) -} - -allFares = qualifyFares + faresWithNoRules - -passengerFare = cheapest(allFares) -``` - -As shown on the final two lines, once you have the list of qualifying -fares, you can combine these with fares that have no rules (from the -previous section) and then determine the cheapest fare. - -First though, you must determine if a fare with rules qualifies for the -given trip. If a fare specifies zones that must be passed through, then -all rules must be matched. - -***Note:** If a particular rule specifies a different route, start, or -finish than the one you are checking, you do not need to ensure the -`contains_id` matches, since this rule no longer applies. You still need -to check the other rules for this fare.* - -The algorithm needs to build up a list of zone IDs from the fare rules -in order to check against the trip. Once this has been done, you need to -check that every zone ID collected from the rules is contained in the -trip's list of zones. - -``` -qualifies(fare, routeId, originId, destinationId, containsIds) { - - fareContains = [ ] - - for (rule in fare.rules) { - if (rule.contains.length == 0) - continue - - if (rule.route.length > 0 AND rule.route != routeId) - continue - - if (rule.origin.length > 0 AND rule.origin != originId) - continue - - if (rule.desination.length > 0 AND rule.destination != destinationId) - continue - - fareContains.add(rule.containsId); - } - - if (fareContains.size == 0) - return YES - - if (containIds HAS EVERY ELEMENT IN fareContains) - return YES - else - return NO -} -``` - -This algorithm achieves the following: - -* Only rules that have a value for `contains_id` are relevant. Rules - that do not have this value fall through and should be considered as - qualified. -* If the route is specified but not equal to the one being checked, it - is safe to ignore the rule's `contains_id`. If the route is empty - or equal, the loop iteration can continue. -* Check for the `origin_id` and `destination_id` in the same - manner as `route_id`. -* If the route, origin and destination all qualify then store the - `contains_id` so it can be checked after the loop. - -The algorithm returns *yes* if the fare qualifies, meaning you can save -it as a qualifying fare. You can then return the cheapest qualifying -fare to the user. - -### Calculating Trips With Transfers - -Once you introduce transfers, fare calculation becomes more complicated. -A "trip with a transfer" is considered to be a trip where the passenger -boards a vehicle, disembarks, and then gets on another vehicle. For -example: - -* Travel on trip T1 from Stop S1 to Stop S2 -* Walk from Stop S2 to Stop S3 -* Travel on trip T2 from Stop S3 to Stop S4. - -In order to calculate the total fare for a trip with transfers, the -following algorithm is used: - -1. Retrieve list of qualifying fares for each trip individually -2. Create a list of every fare combination possible -3. Loop over all combinations and find the total cost -4. Return the lowest cost from Step 3. - -Step 1 was covered in *Calculating a Single Trip Fare*, but -you must skip the final step of finding the cheapest fare. This is -because the cheapest fare may change depending on subsequent transfers. -Instead, this step is performed once the cheapest *combination* is -determined. - -To demonstrate Step 2, consider the following example: - -* The trip on T1 from S1 to S2 yields the following qualifying fares: - F1, F2. -* The subsequent trip on T2 from S3 to S4 yields the following - qualifying fares: F3, F4. - -Generating every combination of these fares yields the following -possibilities: - -* F1 + F3 -* F1 + F4 -* F2 + F3 -* F2 + F4. - -Step 3 can now be performed, which involves finding the total cost for -each combinations. As you need to take into account the possibility of -timed transfers (according to the data stored in -`fare_attributes.txt`), you also need to know about the times of these -trips. - -The following algorithm can be used to calculate the total cost using -transfer rules. In this example, you would call this function once for -each fare combination. - -``` -function totalCost(fares) { - total = 0 - - for (fare in fares) { - freeTransfer = NO - - if (previousFare ALLOWS TRANSFERS) { - if (HAS ENOUGH TRANSFERS REMAINING) { - if (TRANSFER NOT EXPIRED) { - freeTransfer = YES - } - } - } - - if (!freeTransfer) - total = total + fare.price; - - previousFare = fare; - } - - return total; -} -``` - -Once all combinations have called the `totalCost` algorithm, you will -have a price for each trip. You can then return the lowest price as the -final price for the trip. - diff --git a/.idea/gtfs-book/ch-20-trip-patterns.md b/.idea/gtfs-book/ch-20-trip-patterns.md deleted file mode 100644 index 4b0cf73..0000000 --- a/.idea/gtfs-book/ch-20-trip-patterns.md +++ /dev/null @@ -1,141 +0,0 @@ -## 20. Trip Patterns** - -In a GTFS feed, a route typically has multiple trips that start and -finish at the same stops. If you are looking to reduce the size of the -data stored, then converting data from `stop_times.txt` into a series -of reusable patterns is an excellent way to do so. - -For two trips to share a common pattern, the following must hold true: - -* The stops visited and the order in which they are visited must be the same -* The time differences between each stop must be the same. - -The following table shows some fictional trips to demonstrate this. - -| **Stop** | **Trip 1** | **Trip 2** | **Trip 3** | -| :------- | :--------- | :--------- | :--------- | -| S1 | 10:00:00 | 10:10:00 | 10:20:00 | -| S2 | 10:02:00 | 10:13:00 | 10:22:00 | -| S3 | 10:05:00 | 10:15:00 | 10:25:00 | -| S4 | 10:06:00 | 10:18:00 | 10:26:00 | -| S5 | 10:10:00 | 10:21:00 | 10:30:00 | - -In a GTFS feed, this would correspond to 15 records in -`stop_times.txt`. If you look more closely though, you can see the -trips are very similar. The following table shows the differences -between each stop time, instead of the actual time. - -| **Stop** | **Trip 1** | **Trip 2** | **Trip 3** | -| :------- | :-------------- | :-------------- | :-------------- | -| S1 | 00:00:00 | 00:00:00 | 00:00:00 | -| S2 | 00:02:00 (+2m) | 00:03:00 (+3m) | 00:02:00 (+2m) | -| S3 | 00:05:00 (+5m) | 00:05:00 (+5m) | 00:05:00 (+5m) | -| S4 | 00:06:00 (+6m) | 00:08:00 (+8m) | 00:06:00 (+6m) | -| S5 | 00:10:00 (+10m) | 00:11:00 (+11m) | 00:10:00 (+10m) | - -You can see from this table that the first and third trip, although they -start at different times, have the same offsets between stops (as well -as stopping at identical stops). - -Instead of using a table to store stop times, you can store patterns. By -storing the ID of the pattern with each trip, you can reduce the list of -stop times in this example from 15 to 10. As only time offsets are -stored for each patterns, the trip starting time also needs to be saved -with each trip. - -You could use SQL such as the following to model this. - -```sql -CREATE TABLE trips ( - trip_id TEXT, - pattern_id INTEGER, - start_time TEXT, - start_time_secs INTEGER -); - -CREATE TABLE patterns ( - pattern_id INTEGER, - stop_id TEXT, - time_offset INTEGER, - stop_sequence INTEGER -); -``` - -The data you would store for trips in this example is shown in the -following table. - -| `trip_id` | `pattern_id` | `start_time` | `start_time_secs` | -| :-------- | :----------- | :----------- | :---------------- | -| T1 | 1 | 10:00:00 | 36000 | -| T2 | 2 | 10:10:00 | 36600 | -| T3 | 1 | 10:20:00 | 37200 | - -***Note:** The above table includes start_time_secs, which is an integer -value representing the number of seconds since the day started. Using -the hour, minutes and seconds in start_time, this value is `H * 3600 + M * 60 + S`.* - -In the `patterns` table, you would store data as in the following -table. - -| `pattern_id` | `stop_id` | `time_offset` | `stop_sequence` | -| :----------- | :-------- | :------------ | :-------------- | -| 1 | S1 | 0 | 1 | -| 1 | S2 | 120 | 2 | -| 1 | S3 | 300 | 3 | -| 1 | S4 | 360 | 4 | -| 1 | S5 | 600 | 5 | -| 2 | S1 | 0 | 1 | -| 2 | S2 | 180 | 2 | -| 2 | S4 | 300 | 3 | -| 2 | S5 | 480 | 4 | -| 2 | S6 | 660 | 5 | - -As you can see, this represents an easy way to significantly reduce the -amount of data stored. You could have tens or hundreds of trips each -sharing the same pattern. When you scale this to the entire feed, this -could reduce, say, 3 million records to about 200,000. - -***Note:** This is a somewhat simplified example, as there is other data -available in `stop_times.txt` (such as separate arrival/departure times, -drop-off type and pick-up type). You should take all of this data into -account when determining how to allocate patterns.* - -### Updating Trip Searches - -Changing your model to reuse patterns instead of storing every stop time -means your data lookup routines must also be changed. - -For example, to find all stop times for a given trip, you must now find -the pattern using the following SQL query. - -```sql -SELECT * FROM patterns - WHERE pattern_id = (SELECT pattern_id FROM trips WHERE trip_id = 'YOUR_TRIP_ID') - ORDER BY stop_sequence; -``` - -If you want to determine the arrival/departure time, you must add the -offset stored for the pattern record to the starting time stored with -the trip. This involves joining the tables and adding `time_offset` to -`start_time_secs`, as shown in the following query. - -```sql -SELECT t.start_time_secs + p.time_offset, p.stop_id - FROM patterns p, trips t - WHERE p.pattern_id = t.pattern_id - AND t.trip_id = 'YOUR_TRIP_ID' - ORDER BY p.stop_sequence; -``` - -### Other Data Reduction Methods - -There are other ways you can reduce the amount of data, such as only -using patterns to store the stops (and not timing offsets), and then -storing the timings with each trip record. A technique such as this -further reduces the size of the database, but the trade-off is that -querying the data becomes slightly more complex. - -Hopefully you can see that by using the method described in this chapter -there are a number of ways to be creative with GTFS data, and that you -must make decisions when it comes to speed, size, and ease of querying -data. diff --git a/.idea/gtfs-book/ch-21-conclusion.md b/.idea/gtfs-book/ch-21-conclusion.md deleted file mode 100644 index 0bdc2e0..0000000 --- a/.idea/gtfs-book/ch-21-conclusion.md +++ /dev/null @@ -1,23 +0,0 @@ -## Conclusion - -Thanks for reading *The Definitive Guide to GTFS*. While there are many -techniques that may take some time to comprehend in this book, the -content should bring you up to speed with GTFS quickly. - -At first glance, GTFS appears to be very simple, but there are a number -of "gotchas" which are not immediately apparent until you have spent -significant time working with a range of feeds. - -The key takeaways from this book are: - -* How GTFS feeds are structured -* How to import a GTFS feed to SQL and perform queries -* How to search for trips, handle blocks and calculate fares -* How to optimize an SQL database to minimize resource usage on mobile - devices. - -If you have enjoyed this book, please share it or feel free to contribution improvements or changes as necessary. - -*Quentin Zervaas* -February, 2014 - diff --git a/.idea/gtfs-book/images/agency-google-maps.png b/.idea/gtfs-book/images/agency-google-maps.png deleted file mode 100644 index b9e7c2a..0000000 Binary files a/.idea/gtfs-book/images/agency-google-maps.png and /dev/null differ diff --git a/.idea/gtfs-book/images/block-01.png b/.idea/gtfs-book/images/block-01.png deleted file mode 100644 index 9408632..0000000 Binary files a/.idea/gtfs-book/images/block-01.png and /dev/null differ diff --git a/.idea/gtfs-book/images/block-02.png b/.idea/gtfs-book/images/block-02.png deleted file mode 100644 index 162e5ea..0000000 Binary files a/.idea/gtfs-book/images/block-02.png and /dev/null differ diff --git a/.idea/gtfs-book/images/block-03.png b/.idea/gtfs-book/images/block-03.png deleted file mode 100644 index 345cd72..0000000 Binary files a/.idea/gtfs-book/images/block-03.png and /dev/null differ diff --git a/.idea/gtfs-book/images/block-04.png b/.idea/gtfs-book/images/block-04.png deleted file mode 100644 index ec384b5..0000000 Binary files a/.idea/gtfs-book/images/block-04.png and /dev/null differ diff --git a/.idea/gtfs-book/images/orthogonal-distance.jpeg b/.idea/gtfs-book/images/orthogonal-distance.jpeg deleted file mode 100644 index 9c88323..0000000 Binary files a/.idea/gtfs-book/images/orthogonal-distance.jpeg and /dev/null differ diff --git a/.idea/gtfs-book/images/shape-original.jpeg b/.idea/gtfs-book/images/shape-original.jpeg deleted file mode 100644 index 5d72050..0000000 Binary files a/.idea/gtfs-book/images/shape-original.jpeg and /dev/null differ diff --git a/.idea/gtfs-book/images/shape-reduced.jpeg b/.idea/gtfs-book/images/shape-reduced.jpeg deleted file mode 100644 index cf6f7cd..0000000 Binary files a/.idea/gtfs-book/images/shape-reduced.jpeg and /dev/null differ diff --git a/.idea/gtfs-book/images/stop-times.png b/.idea/gtfs-book/images/stop-times.png deleted file mode 100644 index cdce53d..0000000 Binary files a/.idea/gtfs-book/images/stop-times.png and /dev/null differ diff --git a/.idea/gtfs-book/images/stops-sample.png b/.idea/gtfs-book/images/stops-sample.png deleted file mode 100644 index ec0e5c7..0000000 Binary files a/.idea/gtfs-book/images/stops-sample.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/ch-00-definitive-guide-to-gtfs-realtime.md b/.idea/gtfs-realtime-book/ch-00-definitive-guide-to-gtfs-realtime.md deleted file mode 100644 index 0184448..0000000 --- a/.idea/gtfs-realtime-book/ch-00-definitive-guide-to-gtfs-realtime.md +++ /dev/null @@ -1,58 +0,0 @@ -# The Definitive Guide to GTFS-realtime - -*How to consume and produce real-time public transportation data with the GTFS-rt specification.* - -Originally written by Quentin Zervaas. - -## About This Book - -This book is a comprehensive guide to GTFS-realtime, a specification for -publishing of real-time public transportation data. GTFS-realtime is -designed to complement the scheduled data that hundreds of transit -agencies around the world publish using GTFS (General Transit Feed -Specification). - -This book begins with a description of the specification, with -discussion about the three types of data contained in GTFS-realtime -feeds: service alerts; vehicle positions; and trip updates. - -Next the reader is introduced to *Protocol Buffers*, the data format -that GTFS-realtime uses when it is being transmitted. This section then -instructs the reader how to consume the three types of data from -GTFS-realtime feeds (both from standard feeds and feeds with -*extensions*). - -Finally, the reader is shown how to produce a GTFS-realtime feed. A -number of examples in this book use Java, but the lessons can be applied -to a number of different languages. - -This book complements *The Definitive Guide to GTFS*. - -### About The Author - -Quentin was the founder of TransitFeeds (now ), a web -site that provides a comprehensive listing of public transportation data -available around the world. This site is referenced various times -throughout this book. - -### Credits - -First Edition. Published in August 2015. - -**Technical Reviewer** - -Nick Maher - -**Copy Editors** - -Anne Zervaas -Miranda Little - -**Disclaimer** - -The information in this book is distributed on an "as is" basis, without -warranty. Although every precaution has been taken in the preparation of -this work, the author shall not be liable to any person or entity with -respect to any loss or damage caused or alleged to be caused directly or -indirectly by the information contained in this book. - diff --git a/.idea/gtfs-realtime-book/ch-01-introduction.md b/.idea/gtfs-realtime-book/ch-01-introduction.md deleted file mode 100644 index eff0ddf..0000000 --- a/.idea/gtfs-realtime-book/ch-01-introduction.md +++ /dev/null @@ -1,81 +0,0 @@ -## 1. Introduction to GTFS-realtime - -GTFS-realtime is a standard developed by Google in order to allow -transit agencies to provide real-time information about their service. - -There are three types of data a GTFS-realtime feed provides: - -1. Vehicle positions -2. Trip updates -3. Service alerts - -Vehicle positions contain data about events that have already occurred -(e.g. "the vehicle was at this location one minute ago"), whereas trip -updates contain data about events that are yet to occur (e.g. "the bus -will arrive in three minutes"). - -Typically, a single GTFS-realtime feed contains only one of these three -types of data. Many agencies therefore have multiple GTFS-realtime feeds -(that is, one for vehicle positions, one for trip updates and one for -service alerts). - -![GTFS-realtime structure](images/GTFS-realtime-structure.png) - -The above diagram shows how a GTFS-realtime feed is designed to -complement a GTFS (General Transit Feed Specification) feed. It does -this in two ways: - -1. All identifiers for routes, trips and stops match those that appear - in the corresponding GTFS feed. -2. A GTFS feed shows the projected schedule for a given period (such as - the next six months), while the GTFS-realtime is used to make - last-minute adjustments based on real-world conditions (such as - traffic, roadworks, or weather). - -### Consuming GTFS-realtime Feeds - -The format of GTFS-realtime feeds is based on Protocol Buffers, a -language and platform-neutral mechanism for serializing structured data. - -This is similar conceptually to JSON (JavaScript Object Notation), but -the data transferred across the wire is binary data and not -human-readable in its raw format. - -*Chapter 5. Protocol Buffers* shows you how to use Protocol -Buffers and the associated **gtfs-realtime.proto** file (used to -instruct Protocol Buffers how GTFS-realtime is structured). - -Consuming GTFS-realtime Feeds on Mobile Devices - -A common use-case for GTFS and GTFS-realtime feeds is to build -transit-related mobile apps that show scheduling data. However, it is -important to note that GTFS-realtime feeds are not intended to be -consumed directly by a mobile device. - -Many of the vehicle positions and trip update feeds provided by transit -agencies include a snapshot of their entire network at a single moment. -For a large network, this could be multiple megabytes of data being -updated every 10-15 seconds. - -A mobile app downloading a full GTFS-realtime feed every 10-15 seconds -would quickly download a large amount of data over their cellular -connection, which could be very expensive. Additionally, their device -would need to process a large amount of data -- most of which would not -be relevant -- which would run down their battery unnecessarily. This -would also put a huge amount of strain on the provider's servers. - -Rather, GTFS-realtime is intended to be consumed by an intermediate -server. In the case of a mobile app, this intermediate server would -likely belong to the creator of the app. The mobile app can then query -this intermediate server for the relevant data it needs at that time. - -![Direct or Intermediate](images/GTFS-realtime-direct-or-intermediate.png) - -The above diagram demonstrates the different models. On the left, mobile -devices download entire GTFS-realtime feeds from the provider. Each -device is downloading 5 megabytes every 10-15 seconds. - -On the right, an intermediate server records all vehicle positions, then -mobile devices request only the data they need. This significantly -reduces the amount of data transferred. - diff --git a/.idea/gtfs-realtime-book/ch-02-service-alerts.md b/.idea/gtfs-realtime-book/ch-02-service-alerts.md deleted file mode 100644 index 6fb81c5..0000000 --- a/.idea/gtfs-realtime-book/ch-02-service-alerts.md +++ /dev/null @@ -1,363 +0,0 @@ -## 2. Introduction to Service Alerts - -Service alerts are generally used by transit agencies to convey -information that can not be conveyed using a trip update message (the -GTFS-realtime trip update message is covered in *Chapter 4. Introduction -to Trip Updates*). - -For example, consider a bus stop that was to be closed for a period of -time due to construction in the area. If the stop was to be closed for a -long period of time, the transit agency could modify the long-term -schedule (the GTFS feed). If the closure was unexpected and the stop -will reopen later that day, the agency can reflect this temporary -closure using trip updates in the GTFS-realtime feed (instructing each -relevant trip to skip that stop). - -However, in both of these cases the reason *why* there were no trips -visiting the stop has not been conveyed. Using a service alert, you can -explain why the stop is closed and when it will reopen. - -You can attach one or more entities to a service alert (such as routes, -trips or stops). In the above example of a stop being closed, you could -include its stop ID as it appears in the corresponding GTFS feed, -thereby allowing apps consuming the feed to display a message to their -users. - -### Examples of Service Alerts - -Some examples of common service alerts used by agencies include: - -* **Holiday schedules.** If there is an upcoming holiday, agencies may - use service alerts to remind travelers that a holiday schedule will - be applied that day. -* **Stop closing.** If a stop is closed (temporarily or permanently) - customers may be notified using a service alert. In this instance, - the alert can be linked to the stop that is closing. If it is being - replaced by a new stop, the new stop may also be linked. -* **Route detour.** Service alerts can be used to indicate that for a - period of time in the future a route will be redirected, perhaps due - to a road closure. In this instance, the alert would link to stops - that lie on the closed part of the road, as well as to routes that - will detour as a result of the closure. -* **Change to schedule.** If an upcoming change to a schedule results - in far less (or far more) services operating a stop, service alerts - might be used to notify customers. -* **Vehicle broken down.** If a bus has broken down, or if electric - trains are not moving due to a power outage, passengers can be - notified using service alerts. In this instance, the alert could - link to a specific trip, or it could be more general and instead - link to its route or to the stops affected. - -The `EntitySelector` type described shows how service -alerts can be linked to routes, trips and stops accordingly. - -### Sample Feed - -The following extract is from the service alerts feed of TriMet in -Portland (). It contains a -single GTFS-realtime service alert *entity*. An entity contains either a -service alert, a vehicle position or a trip update. This service alert -indicates that a tree has fallen, causing potential delays to two -routes. - -**Note:** This extract has been converted from its binary format into a -human-readable version. *Outputting Human-Readable GTFS-realtime Feeds* -illustrates how this is achieved. - -``` -entity { - id: "35122" - - alert { - active_period { - start: 1415739180 - end: 1415786400 - } - - informed_entity { - route_id: "19" - route_type: 3 - } - - informed_entity { - route_id: "71" - route_type: 3 - } - - url { - translation { - text: "http://trimet.org/alerts/" - } - } - - description_text { - translation { - text: "Expect delays due to a tree down blocking northbound 52nd at Tolman. Police are flagging traffic thru using the southbound lane." - } - } - } -} -``` - -The elements of this service alert entity are as follows. - -* Active Period -* Informed Entity -* URL -* Description text - -Each of these fields are discussed below, as are some ways this -particular feed could be improved. - -### Active Period - -The active period element in this example states that the alert is -active between 12:53 PM on one day and 2:00 AM the following day -(Portland time). It is likely they included this finishing time to say -"this might last all day, but it definitely won't be a problem -tomorrow". - -Often precise timing isn't known. If the active period is omitted, then -the alert is assumed to be active for as long as it appears in the feed. - -### Informed Entity - -In a GTFS-realtime service alerts feed, *informed entity* refers to a -route, trip, stop, agency or route type, or any combination of these. - -In this example, there are two informed entities, both of which are bus -routes (as indicated by a route type of `3`). Referring to the TriMet -GTFS feed (), the routes -with an ID of `19` and `71` are as follows. - -```csv -route_id,route_short_name,route_long_name,route_type -19,19,Woodstock/Glisan,3 -71,71,60th Ave/122nd Ave,3 -``` - -Technically in this case the `route_type` value need not be specified, -as this can be derived using the GTFS feed. Sometimes, however, an alert -may impact *all* routes for a given mode of transport, so the route type -would be specified only. - -For instance, if an electrical outage affects all subway trains, then an -informed entity containing only a route type of `1` (the GTFS value -for Subway routes) would be sufficient, rather than including a service -alert for each subway route. - -### URL - -This field contains a web address where additional information about the -alert can be found. - -In this particular example, TriMet has included a generic URL for their -service alert. Other alerts in the same feed also use the same URL. It -would be far more useful if instead each alert pointed to a URL -specifically related to that alert. This would make it easier to provide -the end user with additional information specific to that alert. - -### Description Text - -This field contains a textual description of the alert that can be -presented to the users of your web site or app. Just like the URL field, -it has a type of `TranslatedString`, which is the mechanism by which -GTFS-realtime can provide translations in multiple languages if -required. - -### Improvements - -In addition to providing a more specific URL, this alert could be -improved by including values for the `cause` and `effect` fields. -For instance, `cause` could have a value of `WEATHER` or -`ACCIDENT`, while `effect` could have a value of `DETOUR` or -`SIGNIFICANT_DELAYS`. - -Additionally, this alert could include the `header_text` field to -complement the `description_text` field. This would allow you to -display a summary of the alert (`header_text`), then supply more -information if the user of your app or web site requests it -(`description_text`). - -### Specification - -This section contains the specification for the `Alert` entity type. -Some of this information has been sourced from the GTFS-realtime -reference page -(). - -### Alert - -An `Alert` message makes it possible to provide extensive information -about a given service alert, including the ability to match it to any -number of routes, stops or trips. - -The fields for any single service alert are as described in the -following table. - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `active_period` | `TimeRange` | Zero or more occurrences | The time or times when this alert should be displayed to the user. If no times are specified, then the alert should be considered active as long as it appears in the feed. Sometimes there may be multiple active periods specified. For example, if some construction was occurring daily between a certain time, there might be an active period record for each day it will occur.| | | -| `informed_entity` | `EntitySelector` | Zero or more occurrences | These are the entities this service alert relates to (such as route, trips or stops). | | | -| `cause` | `Cause` | Optional | This is the event that occurred to trigger the alert. Possible values for the `Cause` enumerator are listed below this table. | | | -| `effect` | `Effect` | Optional | This indicates what action was taken as a result of the incident. Possible values for the `Effect` enumerator are listed below this table. | | | -| `url` | `TranslatedString` | Optional | A URL which provides additional information that can be shown to users. | | | -| `header_text` | `TranslatedString` | Optional | A brief summary of the alert that can be used as a heading. This is to be in plain-text (no HTML markup). | | | -| `description_text` | `TranslatedString` | Optional | A description of the alert that complements the header text. Similarly, it is to be in plain-text (no HTML markup). | | | - -The following values are valid for the `Cause` enumerator: - -* `ACCIDENT` -* `CONSTRUCTION` -* `DEMONSTRATION` -* `HOLIDAY` -* `MAINTENANCE` -* `MEDICAL_EMERGENCY` -* `POLICE_ACTIVITY` -* `STRIKE` -* `TECHNICAL_PROBLEM` -* `WEATHER` -* `UNKNOWN_CAUSE` -* `OTHER_CAUSE` - -The following values are valid for the `Effect` enumerator: - -* `ADDITIONAL_SERVICE` -* `NO_SERVICE` -* `SIGNIFICANT_DELAYS` -* `DETOUR` -* `STOP_MOVED` -* `MODIFIED_SERVICE` -* `REDUCED_SERVICE` -* `UNKNOWN_EFFECT` -* `OTHER_EFFECT` - -### TimeRange - -A `TimeRange` message specifies a time interval. It is not mandatory -to include both the start and finish times, but at least one of those is -required if a `TimeRange` is included. - -| Field | Type | Frequency | Description | -| :------ | :--- | :-------- | :---------- | -| `start` | `uint64` (64-bit unsigned integer) | Optional | The start time specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | -| `end` | `uint64` (64-bit unsigned integer) | Optional | The end time specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | - -If only the start time is specified, the time range is considered active -after the starting time. - -If only the end time is specified, the time range is considered active -before the end time. - -If both the start and finish times are specified, the time range is -considered active between these times. - -EntitySelector - -An `EntitySelector` message is used to specify an entity from within a -GTFS feed. Doing so allows you to match up a service alert with a route -(or all routes of a given type), trip, stop or agency from the GTFS feed -that corresponds to the GTFS-realtime feed. - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `agency_id` | `string` | Optional | This is the ID of an agency as it appears in the `agency.txt` file of the corresponding GTFS feed. | -| `route_id` | `string` | Optional | This is the ID of a route as it appears in the `routes.txt` file of the corresponding GTFS feed. | -| `route_type` | `int32` (32-bit signed integer) | Optional | This is a GTFS route type, such as `3` for bus routes or `4` for ferry routes. Extended GTFS route types can also be used for this value. | -| `trip` | `TripDescriptor` | Optional | This is used to match a specific trip from the corresponding GTFS feed's `trips.txt` file. Trip matching can be potentially more complex than just matching the `trip_id`, which is why this field differs to the other ID-related fields in `EntitySelector`. You can find more discussion of this below in the `TripDescriptor` section. | -| `stop_id` | `string` | Optional | This is the ID of a stop as it appears in the `stops.txt` file of the corresponding GTFS feed. If the corresponding stop is of type "station" (a `location_type` value of `1`), then you may consider matching this entity to its child stops also. | - -All of these elements are optional, but at least one of them must occur. -If multiple elements are specified, then all must be matched. - -***Note:** Conversely, if you want multiple matches, then you should -instead include multiple EntitySelector values. For instance, if you -want a service alert that covers all buses and ferries, then the -informed_entity field would contain one EntitySelector for buses, and -another for ferries.* - -The following table shows some different combinations that can occur, -and what each of them mean. - -| Fields Specified | Meaning | -| :----------------------- | :--------- | -| `agency_id` | The alert applies to anything relating to the given agency. This may include any routes or trips that match back to the agency, or even stops that the trips stop at. | -| `route_id` | The alert applies to the given route. For instance, if a user is viewing upcoming departures for the matched route, then it would be appropriate to display the alert. | -| `route_type` | The alert is relevant when showing the user any data related to the given route type. For example, if a route type of `3` (buses) is specified, then it would be appropriate to display the alert when a user is viewing upcoming departures for any bus route in the corresponding GTFS feed. | -| `trip` | If a trip is matched, then it would be appropriate to display the alert when the user is viewing anything related to that trip. For instance, if you are showing a list of stop times for the trip then it would be relevant. If you have received a real-time vehicle position for the trip and are showing it to the user on a map, you might show the service alert if the user taps on the vehicle. | -| `stop_id` | If a stop is matched here then it would be appropriate to show an alert in a number of situations, such as when viewing upcoming departures for the stop, or if the user is taking a trip that embarks or disembarks at the matched stop. | -| `agency_id` + `route_id` | This kind of match is redundant, because there should only ever be a maximum of one route that matches a given `route_id` value in a GTFS feed. | -| `route_id` + `trip` | Similar to the previous case, any matched trip will only belong to a single route, so specifying the `route_id` has no real meaning. | -| `route_id` + `stop_id` | Matching both a route and a stop can be useful if an alert relating to a stop only applies to certain routes. For instance, if a stop is serviced by two different routes and you want to notify users that one of the routes will no longer stop here, the alert does not apply to the route that will continue to service the stop. | -| `trip` + `stop_id` | In this case, a service alert is matched to a combination of a trip and a stop. The alert would be relevant to a user waiting at a stop for a particular vehicle. It would not apply to other people at the same stop waiting for a different route. | -| `route_type` + `stop_id` | Sometimes a stop is shared by multiple travel modes. For instance, some light rail services share stops with buses. This combination can be useful if a stop-related alert only applies to one of those modes. | - -As this demonstrates, it is possible to match service alerts to -real-world entities in any number of ways. This allows you to keep -relevant users informed. The alternative to matching on this granular -level would be to show all of your users all service alerts, meaning -most alerts would be irrelevant to most people. - -### TripDescriptor - -One of the files in a GTFS feed is `frequencies.txt`, which is used to -specify trips that repeat every *x* minutes. This file is used when an -agency does not have a specific schedule for trips, other than -guaranteeing, for instance, that a new trip departs every five minutes. - -For example, it is possible for a particular route to run every five -minutes for an entire day, while only having one entry in `trips.txt` -(and one set of corresponding stop times in `stop_times.txt`). - -When using trip frequencies the `trip_id` value may not be enough to -uniquely identify a single trip from the GTFS feed. This means that in -order to match a trip, additional information may need to be supplied, -which the `TripDescriptor` message allows for. - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `trip_id` | `string` | Optional | This is the ID of a trip as it appears in the `trips.txt` of the corresponding GTFS feed. Alternatively, this value may refer to a trip that has been added via a `TripUpdate` message and does not exist in the GTFS feed. | -| `route_id` | `string` | Optional | If this value is specified, it should match the route ID for the trip specified in `trip_id`. If the `route_id` is specified but no `trip_id` is specified, then this trip descriptor references all trips for the given route. | -| `direction_id` | `uint32` (32-bit unsigned integer) | Optional | This value corresponds to the `direction_id` value as specified in the `trips.txt` file of the corresponding GTFS feed. At time of writing this is an experimental field in the GTFS-realtime specification. | -| `start_time` | `string` | Optional | If the specified trip in `trip_id` is a frequency-expanded trip, this value must be specified in order to determine which instance of a trip this selector refers to. Its value is in the format `HH:MM:SS`, as in the `stop_times.txt` and `frequencies.txt` files. | -| `start_date` | `string` | Optional | It is possible that knowing the `trip_id` may not be enough to determine a specific trip. For instance, if a train is scheduled to depart at 11:30 PM but is running 40 minutes late, then you would need to know its date in order to match up with the original trip (40 minutes late), and not the next day's instance of the trip (23 hours 20 minutes early). This field helps to avoid this ambiguity. The date is specified in `YYYYMMDD` format. | -| `schedule_relationship` | `ScheduleRelationship` | Optional | This value indicates the relationship between the trip(s) specified in this selector and its regular schedule. | - -The following values are valid for the `ScheduleRelationship` -enumerator: - -* `SCHEDULED`. Used when the trip being described is running in - accordance with a trip in the GTFS feed. -* `ADDED`. A trip that was added in addition to the schedule. For - instance, if an extra trip was added because there were more - passengers than normal, it would be represented using this value. -* `UNSCHEDULED`. A trip that is running with no schedule associated - with it. For instance, if this trip is expected to run but there is - no static schedule associated with it, it would be marked with this - value. -* `CANCELED`. A trip that existed in the schedule but was removed. - For example, if a vehicle broke down and could not complete the - trip, then it would be marked as canceled. - -If a trip has been added, then the `route_id` should be populated, as -without this it may not be possible to determine which route the added -trip corresponds to (since the `trip_id` value would not appear in the -GTFS `trips.txt` file). - -With the newly-added `direction_id` field (still experimental at time -of writing this book), an added trip can also have its direction -specified, meaning you can present information to your users about which -direction the vehicle is traveling, even if you do not know its specific -stops. - -### TranslatedString - -A `TranslatedString` message contains one or more `Translation` -elements. This allows for alerts to be issued in multiple languages. A -`Translation` element is structured as follows. - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `text` | `string` | Optional | A UTF-8 string containing the message. This string will typically be read by the users of your web site or app. | -| `language` | `string` | Optional | This is the language code for the given text (such as `en-US` for United States English). It can be omitted, but if there are multiple translations then at most only one translation can have this value omitted. | - diff --git a/.idea/gtfs-realtime-book/ch-03-vehicle-positions.md b/.idea/gtfs-realtime-book/ch-03-vehicle-positions.md deleted file mode 100644 index f247ae7..0000000 --- a/.idea/gtfs-realtime-book/ch-03-vehicle-positions.md +++ /dev/null @@ -1,317 +0,0 @@ -## 3. Introduction to Vehicle Positions - -A vehicle position message communicates the physical location of a bus, -train, ferry or otherwise. In addition to location of the vehicle, it -can also provide information about the vehicle's speed, bearing (the -direction it is facing), and how to match up the vehicle with a trip in -the static schedule. - -A recent addition to the GTFS-realtime specification (experimental at -time of writing -- see -) is the -ability to indicate how full a vehicle is. Although this element is not -yet formally a part of the specification, it has been included in this -book so it aligns with current documentation. - -### Sample Feed - -The following extract is from the vehicle position feed of MBTA in -Boston (). MBTA also provide separate -feeds for service alerts and trip updates. - -This extract contains a single GTFS-realtime entity, which represents a -single vehicle position. - -``` -entity { - id: "v1211" - - vehicle { - trip { - trip_id: "25906883" - start_date: "20150117" - schedule_relationship: SCHEDULED - route_id: "28" - } - - position { - latitude: 42.267967 - longitude: -71.093834 - } - - current_stop_sequence: 35 - timestamp: 1421565564 - stop_id: "1721" - - vehicle { - id: "y2189" - label: "2189" - } - } -} -``` - -***Note:** This extract has been converted from its binary format into a -human-readable version. *Outputting Human-Readable GTFS-realtime Feeds* -shows you how this is achieved.* - -Rendering the vehicle position and its path on a map along with the -referenced stop looks as follows. - -![Vehicle Position](images/vehicle-position.png) - -The elements of the vehicle position are described below. The outer -`vehicle` element in this entity is of type `VehiclePosition`. The -inner `vehicle` element is a `VehicleDescriptor`, which is described -shortly. - -### Trip - -If specified, this element is used to link the vehicle position to a -specific trip in the corresponding GTFS feed, or to a trip that has been -added to the schedule. Using MBTA's GTFS feed, you can determine that -the trip can be matched to the record below. You can find this in the -trips.txt file at -. - -| `route_id` | `service_id` | `trip_id` | `trip_headsign` | -| :--------- | :----------------------------- | :-------- | :-------------- | -| 28 | BUSS12015-hbs15no6-Saturday-02 | 25906883 | Mattapan Station via Dudley Station | - -**Note:** If the schedule_relationship value was ADDED or UNSCHEDULED, -there would not have been a corresponding record in trips.txt. - -If you then look up the trip's records in stop_times.txt, you can -determine the trip begins at 25:45:00. This means the trip begins at -1:45 AM on the morning following its service date. In this case, the -start date in the vehicle position is specified as January 17. This -means that this trip actually takes place on the morning of January 18. -If the start date was not included with the vehicle position, it may -have been difficult to determine the specific trip being referenced. - -### Position - -This element contains the geographic location of the vehicle. In this -instance, only the latitude and longitude are specified. It is also -possible to include the vehicle's bearing, odometer and speed, however -only that latitude and longitude are required. - -### Current Stop - -A vehicle position can include information about its position relative -to its current or next stop. - -The status of the stop is indicated by the `current_status` value. In -this example, the `current_status` is not specified, which means the -vehicle is currently in transit to the stop (in other words, it is not -stopped there, nor is it about to stop). - -The stop referred to in this instance has a `stop_id` of `1721`. -Referring once again to the MBTA GTFS feed -(), this stop is as -follows. - -| `stop_id` | `stop_code` | `stop_name` | `stop_lat` | `stop_lon` | -| :-------- | :---------- | :------------------------ | :--------- | :--------- | -| 1721 | 1721 | Blue Hill Ave @ River St | 42.267151 | -71.09362 | - -The other value used to identify the upcoming or current stop is the -`current_stop_sequence` value. This refers to the `stop_sequence` -value in `stop_times.txt`. - -***Note:** Technically, you can infer the stop based on the trip and -current_stop_sequence value, so you do not strictly need the stop_id -value. However, in some cases it may not be possible to identify the -trip (and therefore not be able to infer the specific stop), so having -the `stop_id` value available in the vehicle position is useful.* - -By looking up the stop ID and trip ID in `stop_times.txt`, you can -locate the following entry: - -| `trip_id` | `stop_sequence` | `stop_id` | `arrival_time` | `departure_time` | -| :-------- | :-------------- | :-------- | :------------- | :--------------- | -| 25906883 | 35 | 1721 | 26:14:00 | 26:14:00 | - -***Note:** Remember that an hour value of 26 corresponds to 2 AM on the -following day (in this instance, the trip value specifies the trip's -date as January 17, so this stop time is 2 AM on January 18).* - -In plain English, this can be interpreted as "the vehicle is currently -in transit to stop 1721, scheduled to arrive at 2:14 AM." Note however, -that the timestamp value corresponds to 2:19 AM, meaning the bus is -about 5 minutes late. - -Tip: You can quickly find the human-readable version of a timestamp -using the command-line tool date. You may need to set the local timezone -first. In this instance, Boston's timezone can be set using export -TZ=America/New_York. You can then use date -r 1421565564 to find the -value of Sun 18 Jan 2015 02:19:24 EST. - -### Vehicle Descriptor - -The vehicle descriptor provides information to identify the specific -vehicle. In this example, the internal vehicle identifier is `y2189`. -It should remain consistent for this particular vehicle across the -system. Any subsequent vehicle positions or trip updates that refer to -this vehicle should use the same identifier. - -The vehicle ID value is not intended to be presented to end-users. -Instead, the label field should be used. The label could refer to a -particular train number, or perhaps a number painted on the side of a -bus. In the case of vehicle 2189, the number appears as in the following -photograph. - -![MBTA Bus](images/mbta_bus.png) - -The other piece of identifying information that can be presented to -users is the license plate of the vehicle. The MBTA's feed doesn't -specify this value, presumably because it duplicates the `label` -value. - -### Improvements - -Although this sample vehicle position contains the most pertinent -information (the coordinates of the vehicle and its corresponding trip), -knowing the direction that the vehicle is facing can also be useful. - -A common way of presenting vehicle positions on a map is to show all -positions for a given route on a map. If you can provide this extra -piece of information, a passenger can look at a map of all vehicle -positions for a given route and determine which are traveling in their -desired direction, and which are traveling in the opposite direction. - -***Note:** When you can match up a vehicle position to a specific trip, -it may be possible to filter which vehicles appear on the map using the -`direction_id` value for the trip. In some instances though, you may only -know the route of a vehicle and not its specific trip.* - -*Chapter 7. Consuming Vehicle Positions* shows you how to -determine the bearing of a vehicle if it is not included by the data -provider. - -### Specification - -This section contains the specification for the `VehiclePosition` -entity type. Some of this information has been sourced from the -GTFS-realtime reference page -(). - -### VehiclePosition - -A `VehiclePosition` element is used to specify the geographic position -and other attributes of a single vehicle, as well as providing -information to match that vehicle back to the corresponding GTFS feed. - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `trip` | `TripDescriptor` | Optional | This is used to match the vehicle position to a specific trip from `trips.txt` in the corresponding GTFS feed. | -| `vehicle` | `VehicleDescriptor` | Optional | This element provides information that can be used to identify a particular vehicle. | -| `position` | `Position` | Optional | The vehicle's geographic location, bearing and speed are specified using the `Position` type. | -| `current_stop_sequence` | `int32` (32-bit signed integer) | Optional | The sequence of the current stop, as it appears in the `stop_sequence` value for the trip matched in the corresponding `stop_times.txt` file. | -| `stop_id` | `string` | Optional | This is used to identify the current stop. If specified, the `stop_id` value must correspond to an entry in the `stops.txt` file of the corresponding GTFS feed. | -| `current_status` | `VehicleStopStatus` | Optional | If the current stop is specified (using `current_stop_sequence`), this value specifies what the "current stop" means. If this value isn't specified, it is assumed to be `IN_TRANSIT_TO`. | -| `timestamp` | `uint64` (64 bit unsigned integer) | Optional | This value refers to the moment at which the vehicle's position was measured, specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | -| `congestion_level` | `CongestionLevel` | Optional | This value indicates the status of the traffic flow the vehicle is currently experiencing. The possible values for this element are listed below this table. | -| `occupancy_status` | `OccupancyStatus` | Optional | At time of writing, this field is experimental only. If specified, it indicates how full a given vehicle is. | - -The possible values for the `VehicleStopStatus` enumerator are as -follows: - -| Value | Description | -| :---- | :---------- | -| `INCOMING_AT` | The vehicle is just about to arrive at the specified stop. In some vehicles, there is a visual display or audio announcement when approaching the next stop. This could correspond with `current_status` changing from `IN_TRANSIT_TO` to `INCOMING_AT`. | -| `STOPPED_AT` | The vehicle is currently stationary at the stop. Once it departs the `current_status` would update to `IN_TRANSIT_TO`. | -| `IN_TRANSIT_TO` | The vehicle has departed the previous stop and is on its way to the specified stop. This is the default value if `current_status` is not specified. | - -The possible values for the `CongestionLevel` enumerator are as -follows: - -| Value | Description | -| :---- | :---------- | -| `UNKNOWN_CONGESTION_LEVEL` | If the congestion level is not specified, then this is the default value. | -| `RUNNING_SMOOTHLY` | Traffic is flowing smoothly. | -| `STOP_AND_GO` | Traffic is flowing, but not smoothly. | -| `CONGESTION` | The vehicle is experiencing some level of congestion, and therefore likely to be moving very slowly. | -| `SEVERE_CONGESTION` | The vehicle is experiencing a high level of congestion, and therefore likely to be not moving. | - -While this information can be useful to present to the user, it does not -allow you to make any inference as to whether the vehicle will adhere to -its schedule. Schedules are often designed to account for levels of -congestion, depending on the time of day. - -For this value to be useful in telling a user why their vehicle may be -late, the GTFS `stop_times.txt` would likely also need a field to -indicate the expected congestion level for any given stop time. -Realistically though, this is where the `TripUpdate` element comes -into play. This is covered in *Chapter 4. Introduction to Trip Updates*. - -### TripDescriptor - -The meaning of the trip descriptor differs slightly for a vehicle -position than for a service alert. In a service alert, if the -`route_id` is specified but the `trip_id` is not, then the service -alert applies to all trips for that route. - -In the case of a vehicle position, if the `route_id` is specified but -not the `trip_id`, then it means the vehicle position corresponds to -"some" trip for that route, not "all" trips (it does not make sense for -it to apply to all trips). - -This means that if a user wants to know the vehicle positions for a -given route, you can show them all known positions, even if you are -unable to match the trip back to a trip in the corresponding GTFS feed. - -Refer to Chapter 4 for a description of all elements in a -`TripDescriptor`. - -### VehicleDescriptor - -This element is used to identify a specific vehicle, both internally and -for passengers. Every single vehicle in the system must have its own -identifier, and it should carry across all vehicle positions and trip -updates that correspond to the specific vehicle. - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `id` | `string` | Optional | A unique identifier for a vehicle. This value is not intended to be shown to passengers, but rather for identifying the vehicle internally. | -| `label` | `string` | Optional | A label that identifies the vehicle to passengers. Unlike the `id` value, this value may be repeated for multiple vehicles, and it may change for a given vehicle over the course of a trip or series of trips. This might correspond to a route number that is displayed on a bus, or a particular train number, or some other identifier that passengers can see. | -| `license_plate` | `string` | Optional | The license plate of the vehicle. | - -### Position - -This element specifies the geographic position of a vehicle, as well as -related attributes such as bearing and speed. - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `latitude` | `float` | Required | The latitude of the vehicle (a number in the range of `-90` to `90`). | -| `longitude` | `float` | Required | The longitude of the vehicle (a number in the range of `-180` to `180`). | -| `bearing` | `float` | Optional | Degrees, clockwise from True North. 0 is North, 90 is East, 180 is South, 270 is West. This can be either the direction the vehicle is facing, or the direction towards the next stop (GTFS-realtime does not provide a mechanism to determine which). | -| `odometer` | `double` | Optional | A measure of distance in meters. The GTFS-realtime specification does not state exactly what this value should represent. It could represent either the total number of meters the vehicle has ever travelled, or the number of meters travelled since the beginning of its current trip. | -| `speed` | `float` | Optional | The speed of the vehicle at the time of the reading, in meters per second. | - -While the latitude and longitude are the most important pieces of -information in this element, the vehicle's bearing can also be useful -to know. *Determining a Vehicle's Bearing* shows you how to -determine the bearing if it is not specified. - -### OccupancyStatus - -***Warning:** At time of writing the OccupancyStatus enumerator is -considered experimental only.* - -This enumerator is used for indicating how full a vehicle is. This can -be useful for warning passengers waiting for this vehicle that they may -not be able to fit and should instead attempt to use a different -vehicle. - -| Value | Description | -| :---- | :---------- | -| `EMPTY` | Used to indicate there are no (or very few) passengers on board. | -| `MANY_SEATS_AVAILABLE` | The vehicle is not empty, but it has many seats available. | -| `FEW_SEATS_AVAILABLE` | The vehicle has some seats available and is still accepting passengers. | -| `STANDING_ROOM_AVAILABLE` | The vehicle is still accepting passengers, but they will have to stand. | -| `CRUSHED_STANDING_ROOM_ONLY` | The vehicle is still accepting passengers, but they will have to stand and there is very limited space. | -| `FULL` | The vehicle is considered full but may still be accepting new passengers | -| `NOT_ACCEPTING_PASSENGERS` | The vehicle is not accepting new passengers. | - diff --git a/.idea/gtfs-realtime-book/ch-04-trip-updates.md b/.idea/gtfs-realtime-book/ch-04-trip-updates.md deleted file mode 100644 index 3269fb4..0000000 --- a/.idea/gtfs-realtime-book/ch-04-trip-updates.md +++ /dev/null @@ -1,217 +0,0 @@ -## 4. Introduction to Trip Updates - -A trip update message is used to report the progress of a vehicle along -its trip. Each trip may only have one trip update message in a -GTFS-realtime feed. - -A trip update can report that a trip has been canceled, or it can update -the progress of any number of stops on the trip. For example, a trip -update may contain an arrival estimate only for the vehicle's next -stop, or it may contain estimates for every remaining stop on the trip. - -If a trip does not have a trip update message, this should be -interpreted as there being no real-time information available; not that -it is necessarily progressing as scheduled. - -### Sample Feed - -The following extract is from the MBTA trip update feed -(). MBTA also provide separate feeds -for service alerts and vehicle positions. - -This extract contains a single GTFS-realtime entity, which represents a -bus that is four minutes behind schedule (a `delay` value of `240` -seconds). - -``` -entity { - id: "25732950" - - trip_update { - trip { - trip_id: "25732950" - start_date: "20150120" - schedule_relationship: SCHEDULED - route_id: "08" - } - - stop_time_update { - stop_sequence: 43 - arrival { - delay: 240 - } - stop_id: "135" - } - - vehicle { - id: "y2189" - label: "2189" - } - } -} -``` - -***Note:** This extract has been converted from its binary format into a -human-readable version. *Outputting Human-Readable GTFS-realtime Feeds* -shows you how this is achieved.* - -The elements of a `trip_update` entity are as follows. - -### Trip - -This element is used to identify the particular trip that a trip update -applies to. In this instance, the trip has an ID of `25732950`, -running on the service day of 20 January 2015. - -![Boston Trip](images/boston-trip.png) - -***Note:** Although this trip may no longer be active, you can view -similar trips at .* - -Since the `schedule_relationship` value is `SCHEDULED`, this trip -corresponds to a trip in the MBTA GTFS file -(). - -If the `schedule_relationship` value is `ADDED`, then this -corresponds to a new trip for the route with an ID of `08`. The -`stop_time_added` field would likely then contain an entry for each -stop on the added trip. - -***Note:** The trip could also be marked as CANCELED. If so, the trip -could either be in the GTFS feed or it may have been added through a -previous trip update message.* - -### Stop Time Update - -The `stop_time_update` elements contains information specific to a -stop on the trip. It is repeated for each stop that there is information -for. If the trip has been canceled (indicated by a -`schedule_relationship` value of `CANCELED`) then there will no -`stop_time_update` elements. - -In the above sample, there is a single update, corresponding to the stop -with an ID of `135`. You can look up the details of this stop at -. This stop has a -stop sequence of 43, as shown in the following figure. - -![Map](images/stoptime_map.png) - -Referring to the `stop_times.txt` file in the GTFS feed, the scheduled -arrival time for this trip at stop 135 is 6:12 PM. The delay value -indicates that it will be four minutes late (240 seconds), meaning it -will now arrive at 6:16 PM. - -![Remaining Stops](images/remaining_stops.png) - -As there are no additional stop time updates for subsequent updates, it -can be assumed that this delay carries through to the rest of the trip. -There are eight remaining stops after this one, so all of those will -also be four minutes late. - -### Vehicle - -The vehicle information is useful as it enables you to identify specific -vehicles. In this instance, MBTA use the same identifier both as their -internal identifier and also as the identifier printed on the bus. This -sample once again refers to bus `2189`. The photograph in *Chapter 3: Vehicle Positions* -shows how this number appears on the vehicles. - -### Specification - -This section contains the specification for the `VehiclePosition` -entity type. Some of this information has been sourced from the -GTFS-realtime reference page -(). - -### TripUpdate - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `trip` | `TripDescriptor` | Optional | This element is used to match the referenced trip to `trips.txt` file from the corresponding GTFS feed. | -| `vehicle` | `VehicleDescriptor` | Optional | This element provides information that can be used to identify a particular vehicle. | -| `stop_time_update` | `StopTimeUpdate` | Repeated | This element contains one or more instances of `StopTimeUpdate`. Each occurrence represents a prediction for a single stop. They must be in order of their stop sequence. | -| `timestamp` | `uint64` (64-bit unsigned integer) | Optional | This value refers to the moment at which the real-time progress was measured, specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | -| `delay` | `int32` (32-bit signed integer) | Optional | This value is only experimental at time of writing. It is used to indicate the number of seconds the vehicle is either early (negative number) or late (positive number). Estimates specified within `StopTimeUpdate` elements take precedence over this value. | - -### TripDescriptor - -Identifying a trip in a trip update is slightly different to identifying -a trip in a service alert or vehicle position message. With vehicle -positions and service alerts, the trip descriptor may refer to an -arbitrary trip for a given route, but to do so with trip updates does -not make sense. - -With trip updates, you must be able to identify a specific trip from the -corresponding GTFS feed. This is because trip updates will often only -include an update for a single stop, and you must therefore determine -subsequent stop times for a given trip so those can be adjusted -accordingly. To do so, you need to be able to find a specific trip and -its corresponding stop times in the GTFS feed. - -This differs to a service alert where you can apply an alert to all -trips for a given route, rather than one at a specific time. It also -differs to vehicle positions, where being able to see all positions for -a route on a map is useful, even if you do not know the specific trip -each position corresponds to. - -### VehicleDescriptor - -This element is used to identify a specific vehicle, both internally and -for passengers. Every single vehicle in the system must have its own -identifier, and it should carry across all vehicle positions and trip -updates that correspond to the specific vehicle. - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `id` | `string` | Optional | A unique identifier for a vehicle. This value is not intended to be shown to passengers, but rather for identifying the vehicle internally. | -| `label` | `string` | Optional | A label that identifies the vehicle to passengers. Unlike the `id` value, this value may be repeated for multiple vehicles, and it may change for a given vehicle over the course of a trip or series of trips. This might correspond to a route number that is displayed on a bus, or a particular train number, or some other identifier that passengers can see. | -| `license_plate` | `string` | Optional | The license plate of the vehicle. | - -### StopTimeUpdate - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `stop_sequence` | `uint32` (32-bit unsigned integer) | Optional | In GTFS feeds, the order of stops in a trip is indicated by the `stop_sequence` value in `stop_times.txt`. If specified, the value specified in the `StopTimeUpdate` must match the value from the GTFS feed. It is possible for a single trip to make multiple visits to a single stop (for example, if it's a loop service), so this value is important. | -| `stop_id` | `string` | Optional | This value corresponds to a single stop from the associated GTFS feed. Using this value and the `stop_sequence` value, it is possible to pinpoint a specific record from `stop_times.txt` that this `StopTimeUpdate` element alters. | -| `arrival` | `StopTimeEvent` | Optional | Specifies the updated arrival time. If the `schedule_relationship` is `SCHEDULED`, then this field and/or `departure` must be specified. | -| `departure` | `StopTimeEvent` | Optional | Specifies the updated departure time. If the `schedule_relationship` is `SCHEDULED`, then this field and/or `arrival` must be specified. | -| `schedule_relationship` | `ScheduleRelationship` | Optional | If no value is specified, this defaults to `SCHEDULED`. Other possible values and their meanings are as described below. | - -Valid values for the `ScheduleRelationship` enumerator are: - -| Value | Description | -| :---- | :---------- | -| `SCHEDULED` | Indicates this stop occurs in accordance with the scheduled trip, although the arrival or departure times may be different from the times listed in the GTFS `stop_times.txt` file. | -| `SKIPPED` | Indicates that the corresponding stop will be skipped for the given trip. The arrival or departure times may still be included, but the vehicle will not be stopping. | -| `NO_DATA` | This is the value that should be used if no real-time information is available for this stop. In this case, neither `arrival` nor `departure` should be specified (if they are, you can safely ignore them). | - -### StopTimeEvent - -| Field | Type | Frequency | Description | -| :---- | :--- | :-------- | :---------- | -| `delay` | `int32` (32-bit signed integer) | Optional | The number of seconds that a vehicle is early (a negative value) or late (a positive value). A value of `0` indicates the vehicle is exactly on time. | -| `time` | `int64` (64-bit signed integer) | Optional | The time of the arrival or departure, specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | -| `uncertainty` | `int32` (32-bit signed integer) | Optional | Represents the level of uncertainty attached to this prediction in seconds. A value of `0` means is it completely certain, while an omitted value means an unknown level of uncertainty. | - -Either the delay or exact time must be specified. If both are specified, -then the scheduled time in GTFS added to the delay should equal the -`time` value. If it does not, just the `time` value can be used. - -Conversely, if the `delay` value is not specified, you can calculate -it by subtracting the GTFS scheduled time from the predicted `time` -value. - -***Note:** Your interpretation of what constitutes a delay is likely to -depend on how you are presenting real-time data. For instance, if you -present arrivals to your users as "Early", "On-Time" and "Late", it is -likely to be more useful to your users to indicate a 30-second delay as -being "On-Time" rather than "Late".* - -The `uncertainty` field is used to indicate the accuracy of the -prediction. For example, consider a prediction that indicates a bus will -be five minutes late. If the transit agency thinks the prediction is -within a minute on either side of five minutes (say, 4-6 minutes late), -then the uncertainty value is the difference between the minimum and -maximum value. In this example, the uncertainty is 2 minutes -- a value -of `120` seconds. - diff --git a/.idea/gtfs-realtime-book/ch-05-protocol-buffers.md b/.idea/gtfs-realtime-book/ch-05-protocol-buffers.md deleted file mode 100644 index 2059a63..0000000 --- a/.idea/gtfs-realtime-book/ch-05-protocol-buffers.md +++ /dev/null @@ -1,379 +0,0 @@ -## 5. Protocol Buffers - -The previous chapters have included extracts from GTFS-realtime feeds in -a human-readable format. This data is actually represented using a data -format called *Protocol Buffers*. - -Developed by Google and initially released in 2008, Protocol Buffers are -a way of serializing structured data into a format which is intended to -be smaller and faster than XML. - -Note: Remember, if you are writing a transit-related mobile app, -GTFS-realtime feeds are not intended to be consumed directly by mobile -devices due to the large amount of data transferred. Rather, you will -need an intermediate server to read the feed from the provider then -serve only relevant data to the mobile devices running your app. - -Even though it looks similar to JSON data, the human-readable version of -a protocol buffer is not intended to be manually parsed. Instead, data -is extracted from a protocol buffer using native language (such as Java, -C++ or Python). - -Note: Although the Protocol Buffers application can generate code in -Java, C++ or Python, all code examples in this book will be in Java. - -For example, assume you have written a Java program that reads and -parses a GTFS-realtime service alerts feed (shown later in this chapter, -and in the next chapter). - -In order to consume a GTFS-realtime feed provided by a transit agency -such as TriMet or MBTA, your workflow would look similar to the -following diagram: - -![GTFS-realtime Consumption](images/GTFS-realtime-consumption.png) - -When a transit agency or data provider want to publish a GTFS-realtime -feed, their process would be similar, except instead of reading the feed -every 15 seconds, they would write a new protocol buffer data file every -15 seconds using data received from their vehicles. - -***Note:** *Chapter 11. Publishing GTFS-realtime Feeds* will -show you how to create a GTFS-realtime feed using Protocol Buffers. In -order to do so, you will need to install Protocol Buffers as -demonstrated in this chapter.* - -### Installing Protocol Buffers - -In order to generate code to read or write GTFS-realtime feeds in your -native language, you first need to install Protocol Buffers. Once -installed, it is capable of generating code for Java, C++ or Python. - -This section shows you how to download and build the `protoc` -command-line tool on a UNIX or Linux-based system. These instructions -were derived from installing Protocol Buffers on Mac OS X 10.10. - -First, download and extract the Protocol Buffers source code. At the -time of writing, the current version is 2.6.1. - -``` -$ curl -L \ - https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz \ - -o protobuf-2.6.1.tar.gz - -$ tar -zxf protobuf-2.6.1.tar.gz - -$ cd protobuf-2.6.1 -``` - -***Note:** Visit and -click the "Download" link to find the latest version. The following -instructions should still work for subsequent versions.* - -Next, compile the source files using `make`. First run the -`configure` script to build the `Makefile`, then run `make`. - -``` -$ ./configure && make -``` - -***Note:** Separating the commands by && means that make will only run if -`./configure` exits successfully.* - -Once compilation is complete, you can verify the build by running **make -check**. You can then install it globally on your system using **make -install**. If you do not want to install it globally, you can run -`protoc` directly from the **./src** directory instead. - -``` -$ make check - -$ make install -``` - -Next verify that it has been successfully built and installed by running -the `protoc` command. The output should be "Missing input file." - -``` -$ protoc - -Missing input file. -``` - -The next section will show you how to generate Java files using -`protoc` and the `gtfs-realtime.proto` file. - -### Introduction to gtfs-realtime.proto - -In order to generate source code files that can read a protocol buffer, -you need a `.proto` input file. Typically you won't need to create or -modify `.proto` files yourself, it is useful to have a basic -understanding of how they work. - -A `.proto` file contains a series of instructions that defines the -structure of the data. In the case of GTFS-realtime, there is a file -called `gtfs-realtime.proto` which contains the structure for each of -the messages available (service alerts, vehicle positions and trip -updates). - -The following is an extract from `gtfs-realtime.proto` for the -`VehiclePosition` message. - -**Note: **The `TripDescriptor` and `VehicleDescriptor` types -referenced in this extract also have declarations, which are not -included here. - -``` -message VehiclePosition { - optional TripDescriptor trip = 1; - optional Position position = 2; - optional uint32 current_stop_sequence = 3; - - enum VehicleStopStatus { - INCOMING_AT = 0; - STOPPED_AT = 1; - IN_TRANSIT_TO = 2; - } - - optional VehicleStopStatus current_status = 4 [default = IN_TRANSIT_TO]; - optional uint64 timestamp = 5; - - enum CongestionLevel { - UNKNOWN_CONGESTION_LEVEL = 0; - RUNNING_SMOOTHLY = 1; - STOP_AND_GO = 2; - CONGESTION = 3; - SEVERE_CONGESTION = 4; - } - - optional CongestionLevel congestion_level = 6; - optional string stop_id = 7; - optional VehicleDescriptor vehicle = 8; - extensions 1000 to 1999; -} -``` - -Ignoring the numerical values assigned to each field (they aren't -likely to be relevant because you never directly refer to them), you can -see how the structure is the same as the specification covered earlier -in this book for vehicle positions. - -Each field in a protocol buffer has a unique value assigned to it. This -value is used internally when encoding or decoding each field in -GTFS-realtime feed. If there is additional data to be represented in a -feed, the values between 1000 and 1999 are reserved for extensions. -*Chapter 10. GTFS-realtime Extensions* shows how extensions in -GTFS-realtime work. - -### Compiling gtfs-realtime.proto - -The next step towards consuming a GTFS-realtime feed is to compile the -`gtfs-realtime.proto` file into Java code using `protoc`. In order -to do this, you must have already created a Java project ahead of time. -`protoc` will generate the files and incorporate them directly into -your project's source tree. - -These instructions assume you have created your Java project in -**/path/to/gtfsrt** and that you will download the -`gtfs-realtime.proto` file to **/path/to/protobuf**. - -First, download the `gtfs-realtime.proto` file. - -``` -$ cd /path/to/protobuf - -$ curl \ - https://developers.google.com/transit/gtfs-realtime/gtfs-realtime.proto \ - -o gtfs-realtime.proto -``` - -***Note:** If this URL is no longer current when you read this, you can -find the updated location at -.* - -In order to use `protoc`, you must specify the **--proto_path** -argument as the directory in which `gtfs-realtime.proto` resides. -Additionally, you must specify the full path to the -`gtfs-realtime.proto` file. - -Typically, in a Java project your source files will reside in a -directory called `src` within the project directory. This directory -must be specified in the **--java_out** argument. - -The full command to run is as follows: - -``` -$ protoc \ - --proto_path=/path/to/protobuf \ - --java_out=/path/to/gtfsrt/src \ - /path/to/protobuf/gtfs-realtime.proto -``` - -If this command runs successfully there will be no output to screen, but -there will be newly-created files in your source tree. There should now -be a **./com/google** directory in your source tree, and a package -called `com.google.transit.realtime`. - -### Adding the Protocol Buffers Library - -Before you can use this new package, you must add the Protocol Buffers -library to your Java project. You can either compile these files into a -Java archive (using the instructions in -**./protobuf-2.6.**1**/java/README.txt**), or you can add the Java -source files directly to your project as follows: - -``` -$ cd protobuf-2.6.1 - -$ cp -R ./java/src/main/java/com/google/protobuf \ - /path/to/gtfsrt/src/com/google/ -``` - -If you now try to build your project, an error will occur due to a -missing package called `DescriptorProtos`. You can add this to your -project using the following command: - -``` -$ cd protobuf-2.6.1 - -$ protoc --java_out=/path/to/gtfsrt/src \ - --proto_path=./src \ - ./src/google/protobuf/descriptor.proto -``` - -Your project should now build successfully, meaning you can use the -`com.google.transit.realtime` package to read data from a -GTFS-realtime feed. - -### Reading Data From a GTFS-realtime Feed - -To read the data from a GTFS-realtime feed, you need to build a -`FeedMessage` object. The simplest way to do this is by opening an -`InputStream` for the URL of the GTFS-realtime feed. - -The following code builds a `FeedMessage` for the vehicle positions -feed of the MBTA in Boston. - -***Note:** To simplify the code listings in this book, package imports -are not included. All classes used are either standard Java classes, or -classes generated by Protocol Buffers.* - -```java -public class YourClass { - public void loadFeed() throws IOException { - - URL url = new URL("http://developer.mbta.com/lib/gtrtfs/Vehicles.pb"); - - InputStream is = url.openStream(); - - FeedMessage fm = FeedMessage.parseFrom(is); - - is.close(); - - // ... - } -} -``` - -A `FeedMessage` object contains zero or more entities, each of which -is either a service alert, a vehicle position, or a trip update. You can -retrieve a list of entities using **getEntityList()**, then loop over -them as follows: - -```java -public class YourClass { - public void loadFeed() throws IOException { - - // ... - - for (FeedEntity entity : fm.getEntityList()) { - // Process the entity here - } - - // ... - - } -} -``` - -Since many of the fields in GTFS-realtime are optional, you need to -check for the presence of the field you want to use before trying to -retrieve it. This is achieved using `hasFieldName()`. You can -then retrieve it using `getFieldName()`. - -In the case of `FeedEntity`, you need to check which of the -GTFS-realtime messages are available. For instance, to check if the -entity contains a service alert, you would call `entity.hasAlert()`. -If the call to `hasAlert()` returns `true`, you can retrieve it -using `entity.getAlert()`. - -The following code shows how to access the various entities. - -```java -public class YourClass { - public void loadFeed() throws IOException { - // ... - - for (FeedEntity entity : fm.getEntityList()) { - - if (entity.hasAlert()) { - Alert alert = entity.getAlert(); - - // Process the alert here - } - - if (entity.hasVehicle()) { - VehiclePosition vp = entity.getVehicle(); - - // Process the vehicle position here - } - - if (entity.hasTripUpdate()) { - TripUpdate tu = entity.getTripUpdate(); - - // Process the trip update here - } - } - - // ... - - } -} -``` - -The next three chapters will show you how to process the `Alert`, `VehiclePosition` and `TripUpdate` objects. - -### Outputting Human-Readable GTFS-realtime Feeds - -Earlier chapters included human-readable extracts from GTFS-realtime -feeds. Although plain-text GTFS-realtime feeds are not designed to be -parsed directly, they can be useful for quickly determining the kinds of -data available within a feed. - -All objects in a GTFS-realtime feed can be output using the -`TextFormat` class in the `protobuf` package. Passing the object to -**printToString()** will generate a human-readable version of the -GTFS-realtime element. - -For instance, you can output an entire feed as follows: - -```java -FeedMessage fm = ...; - -String output = TextFormat.printToString(fm); - -System.out.println(output); -``` - -Or you can output individual entities: - -```java -for (FeedEntity entity : fm.getEntityList()) { - String output = TextFormat.printToString(entity); - - System.out.println(output); -} -``` - -Alternatively, you can call the `toString()` method on these objects -to generate the same output. - diff --git a/.idea/gtfs-realtime-book/ch-06-consuming-service-alerts.md b/.idea/gtfs-realtime-book/ch-06-consuming-service-alerts.md deleted file mode 100644 index 9869173..0000000 --- a/.idea/gtfs-realtime-book/ch-06-consuming-service-alerts.md +++ /dev/null @@ -1,305 +0,0 @@ -## 6. Consuming Service Alerts - -The previous chapter introduced you to Protocol Buffers and showed you -how load a remote GTFS-realtime feed into your Java project. This -chapter will show you how to read the data from each of the three entity -types (service alerts, vehicle positions and trip updates). - -The previous chapter also showed you how to loop over all entities in a -feed using `getEntityList()`. Each entity contains either a service -alert, a vehicle position or a trip update. - -Once you have verified that a `FeedEntity` element contains an alert, -you can retrieve the corresponding `Alert` object using -`getAlert()`. - -```java -for (FeedEntity entity : fm.getEntityList()) { - if (entity.hasAlert()) { - Alert alert = entity.getAlert(); - - processAlert(alert); - } -} -``` - -You can then access the specific properties of a service alert using the -returned object. - -### Cause & Effect - -For example, to retrieve the cause value for the alert, you would first -check for its presence with `hasCause()` then retrieve the value using -`getCause()`. - -```java -public void processAlert(Alert alert) { - if (alert.hasCause()) { - Cause cause = alert.getCause(); - - // ... - } - - // ... -} -``` - -The `Cause` object is an enumerator, meaning it has a finite number of -possible values. To determine which value the object corresponds to, -call `getNumber()` to compare it to the possible values. - -```java -switch (cause.getNumber()) { - case Cause.ACCIDENT_VALUE: - // ... - - case Cause.MEDICAL_EMERGENCY_VALUE: - // ... -} -``` - -Note: There are other possible cause values to include in the switch -statement; these have been omitted here as they are all covered in the -specification earlier in this book. - -The `Effect` field works in the same way. The difference is that the -possible list of values to compare against is different. - -```java -if (alert.hasEffect()) { - Effect effect = alert.getEffect(); - - switch (effect.getNumber()) { - case Effect.DETOUR_VALUE: - // ... - - case Effect.SIGNIFICANT_DELAYS_VALUE: - // ... - } -} -``` - -### Title, Description and URL - -Each of these fields are of type `TranslatedString`. A -`TranslatedString` may contain multiple `Translation` objects, so -when processing these fields you must loop over the available -translations. - -For example, to loop over the available translations for the header text -you iterate over **getTranslationList()**. - -```java -if (alert.hasHeaderText()) { - TranslatedString header = alert.getHeaderText(); - - for (Translation translation : header.getTranslationList()) { - // Process the translation here - - } -} -``` - -***Note:** To access the description you would use `hasDescription()` and -`getDescription()`, while to access the URL you would use `hasUrl()` and -`getUrl()`.* - -Alternatively, you can use `getTranslationCount()` and -`getTranslation()` to retrieve each of the available translations. - -```java -for (int i = 0; i < header.getTranslationCount(); i++) { - Translation translation = header.getTranslation(i); - - // Process the translation here -} -``` - -A `Translation` object is made up of text and optionally, its -associated language. When dealing with the URL field, the text retrieved -from `getText()` contains a full URL. - -```java -if (translation.hasLanguage()) { - String language = translation.getLanguage(); - - if (language.equals("fr")) { - // Do something for French language - } - else { - // All other languages - } -} - -if (translation.hasText()) { - String text = translation.getText(); - - // Do something with the text -} -``` - -***Note:** Most GTFS-realtime feeds only specify text in a single -language, and therefore do not include the language value.* - -### Active Period - -A service alert may contain zero or more time ranges, each of which -specify the dates and times the alert is active for. If none are -specified then the alert is active as long as it exists within the feed. - -You can access each of the `TimeRange` objects using either of the -following methods: - -```java -for (TimeRange timeRange : alert.getActivePeriodList()) { - // ... -} - -for (int i = 0; i < alert.getActivePeriodCount(); i++) { - TimeRange timeRange = alert.getActivePeriod(i); - - // ... -} -``` - -A `TimeRange` can have either a start or finish date, or it may -contain both. In Java, you can turn each of these dates into a -`java.util.Date` object as shown below. - -```java -if (timeRange.hasStart()) { - Date start = new Date(timeRange.getStart() * 1000); - - // ... -} - -if (timeRange.hasEnd()) { - Date end = new Date(timeRange.getEnd() * 1000); - - // ... -} -``` - -***Note:** The date value is multiplied by 1,000 because the date in the -GTFS-realtime is represented by the number of seconds since January 1, -1970, while `java.util.Date` is instantiated using the number of -milliseconds since the same date.* - -### Affected Entities - -A service alert may contain zero or more affected entities, each of -which describes a route, stop, agency, trip or route type. You can -access these entities using either of the following methods: - -``` -for (EntitySelector entity : alert.getInformedEntityList()) { - -} - -for (int i = 0; i < alert.getInformedEntityCount(); i++) { - EntitySelector entity = alert.getInformedEntity(i); - -} -``` - -There are a number of properties available in the `EntitySelector` -object, each of which can be used to match the entity to the -corresponding GTFS feed. - -For example, if the `EntitySelector` object has a route ID value, then -you should be able to locate the route in the corresponding GTFS feed's -`routes.txt` file. - -The properties can be accessed as follows: - -```java -if (entity.hasAgencyId()) { - String agencyId = entity.getAgencyId(); - -} - -if (entity.hasRouteId()) { - String routeId = entity.getRouteId(); - -} - -if (entity.hasRouteType()) { - int routeType = entity.getRouteType(); - -} - -if (entity.hasStopId()) { - String stopId = entity.getStopId(); - -} -``` - -The route type value is an Integer and if present must correspond either -to the standard GTFS route type values, or to the extended route type -values. - -The other entity information that can be contained in `EntitySelector` -is trip information. You can access the trip properties as follows: - -```java -if (entity.hasTrip()) { - TripDescriptor trip = entity.getTrip(); - - if (trip.hasTripId()) { - String tripId = trip.getTripId(); - - } - - if (trip.hasRouteId()) { - String routeId = trip.getRouteId(); - - } - - if (trip.hasStartDate()) { - String startDate = trip.getStartDate(); - - } - - if (trip.hasStartTime()) { - String startTime = trip.getStartTime(); - - } - - if (trip.hasScheduleRelationship()) { - ScheduleRelationship sr = trip.getScheduleRelationship(); - - } -} -``` - -You can test the `ScheduleRelationship` value by comparing the -`getNumber()` value to one of the available constants, as follows. - -```java -if (entity.hasTrip()) { - // ... - - if (trip.hasScheduleRelationship()) { - - ScheduleRelationship sr = trip.getScheduleRelationship(); - - switch (sr.getNumber()) { - case ScheduleRelationship.ADDED_VALUE: - // ... - break; - - case ScheduleRelationship.CANCELED_VALUE: - // ... - break; - - case ScheduleRelationship.SCHEDULED_VALUE: - // ... - break; - - case ScheduleRelationship.UNSCHEDULED_VALUE: - // ... - break; - } - } -} -``` diff --git a/.idea/gtfs-realtime-book/ch-07-consuming-vehicle-positions.md b/.idea/gtfs-realtime-book/ch-07-consuming-vehicle-positions.md deleted file mode 100644 index 1b745d0..0000000 --- a/.idea/gtfs-realtime-book/ch-07-consuming-vehicle-positions.md +++ /dev/null @@ -1,509 +0,0 @@ -## 7. Consuming Vehicle Positions - -Just like when consuming service alerts, you can loop over the -`FeedEntity` objects returned from `getEntityList()` to process -vehicle positions. If an entity contains a vehicle position, you can -retrieve it using the `getVehicle()` method. - -```java -for (FeedEntity entity : fm.getEntityList()) { - if (entity.hasAlert()) { - VehiclePosition vp = entity.getVehicle(); - processVehiclePosition(vp); - } -} -``` - -You can then process the returned `VehiclePosition` object to extract -the details of the vehicle position. - -### Timestamp - -One of the provided values is a timestamp reading of when the vehicle -position reading was taken. - -```java -if (vp.hasTimestamp()) { - Date timestamp = new Date(vp.getTimestamp() * 1000); - -} -``` - -***Note:** The value is multiplied by 1,000 because the java.util.Date -class accepts milliseconds, whereas GTFS-realtime uses whole seconds.* - -This value is useful because the age of a reading can dictate how the -data is interpreted. For example, if your latest reading was only thirty -seconds earlier, your users would realize it is very recent and -therefore is probably quite accurate. On the other hand, if the latest -reading was ten minutes earlier, they would see it had not updated -recently and may therefore not be completely accurate. - -The other way this value is useful is for determining whether to store -this new vehicle position. If your previous reading for the same vehicle -has the same timestamp, you can ignore this update, as nothing has -changed. - -### Geographic Location - -A `VehiclePosition` object contains a `Position` object, which -contains a vehicle's latitude and longitude, and may also include other -useful information such as its bearing and speed. - -```java -public static void processVehiclePosition(VehiclePosition vp) { - if (vp.hasPosition()) { - Position position = vp.getPosition(); - - if (position.hasLatitude() && position.hasLongitude()) { - float latitude = position.getLatitude(); - float longitude = position.getLongitude(); - - // ... - } - - // ... - } -} -``` - -Even though the `Position` element of a `VehiclePosition` is -required (according to the specification), checking for it explicitly -means you can handle its omission gracefully. Remember: when consuming -GTFS-realtime data you are likely to be relying on a third-party data -provider who may or may not follow the specification correctly. - -Likewise, the latitude and longitude are also required, but it is still -prudent to ensure they are included. These values are treated as any -floating-point numbers, so technically they may not be valid geographic -coordinates. - -A basic check to ensure the coordinates are valid is to ensure the -latitude is between -90 and 90 and the longitude is between -180 and -180. - -```java -float latitude = position.getLatitude(); -float longitude = position.getLongitude(); - -if (Math.abs(latitude) <= 90 && Math.abs(longitude) <= 180) { - // Valid coordinate - -} -else { - // Invalid coordinate - -} -``` - -A more advanced check would be to determine a bounding box of the data -provider's entire public transportation network from the corresponding -GTFS feed's `stops.txt`. You would then check that all received -coordinates are within or near the bounding box. - -***Note:** The important lesson to take from this is that a GTFS-realtime -feed may appear to adhere to the specification, but you should still -perform your own sanity checks on received data.* - -In addition to the latitude and longitude, you can also retrieve a -vehicle's speed, bearing and odometer reading. - -```java -if (position.hasBearing()) { - float bearing = position.getBearing(); - - // Degrees from 0-359. 0 is North, 90 is East, 180 is South, 270 is West - -} - -if (position.hasOdometer()) { - double odometer = position.getOdometer(); - - // Meters -} - -if (position.hasSpeed()) { - float speed = position.getSpeed(); - - // Meters per second -} -``` - -### Trip Information - -In order to associate a vehicle position with a particular trip from the -corresponding GTFS feed, vehicle positions may include a trip -descriptor. - -***Note:** The trip descriptor is declared as optional in the -GTFS-realtime specification. Realistically, it will be hard to provide -value to end-users without knowing which trip the position corresponds -to. At the very least, you would need to know the route (which can be -specified via the trip descriptor).* - -Just like with service alerts (covered in the previous chapter), there -are a number of values you can retrieve from a trip descriptor to -determine which trip a vehicle position belongs to. The following -listing demonstrates how to access this data: - -```java -if (vp.hasTrip()) { - TripDescriptor trip = vp.getTrip(); - - if (trip.hasTripId()) { - String tripId = trip.getTripId(); - - } - - if (trip.hasRouteId()) { - String routeId = trip.getRouteId(); - - } - - if (trip.hasStartDate()) { - String startDate = trip.getStartDate(); - - } - - if (trip.hasStartTime()) { - String startTime = trip.getStartTime(); - - } - - if (trip.hasScheduleRelationship()) { - ScheduleRelationship sr = trip.getScheduleRelationship(); - - } -} -``` - -### Vehicle Identifiers - -There are a number of values available in a vehicle position entity by -which to identify a vehicle. You can access an internal identifier (not -for public display), a label (such as a vehicle number painted on to a -vehicle), or a license plate, as shown in the following listing: - -```java -if (vp.hasVehicle()) { - VehicleDescriptor vehicle = vp.getVehicle(); - - if (vehicle.hasId()) { - String id = vehicle.getId(); - - } - - if (vehicle.hasLabel()) { - String label = vehicle.getLabel(); - - } - - if (vehicle.hasLicensePlate()) { - String licensePlate = vehicle.getLicensePlate(); - - } -} -``` - -The vehicle descriptor and the values contained within are all optional. -In the case where this information is not available, you can use the -trip descriptor provided with each vehicle position to match up vehicle -positions across multiple updates. - -Being able to match up the trip and/or vehicle reliably over subsequent -updates allows you reliably track the ongoing position changes for a -particular vehicle. For instance, if you wanted to animate the vehicle -moving on a map as new positions were received, you would need to know -that each update corresponds to a particular vehicle. - -### Current Stop - -Each vehicle position record can be associated with a single stop. If -specified, this stop must appear in the corresponding GTFS feed. - -The stop can be identified either by the `stop_id` value, or by using -the `current_stop_sequence` value. If you use the stop sequence, the -stop can be determined by finding the corresponding record in the GTFS -feed's `stop_times.txt` file. - -```java -if (vp.hasStopId()) { - String stopId = vp.getStopId(); - -} - -if (vp.hasCurrentStopSequence()) { - int sequence = vp.getCurrentStopSequence(); - -} -``` - -***Note:** It is possible for the same stop to be visited multiple times -in a single trip (consider a loop service, although there are other -instances when this may also happen). The stop sequence value can be -useful to disambiguate this case.* - -On its own, knowing the stop has no meaning without context. The -`current_status` field provides this context, indicating that the -vehicle is either: - -* In transit to the stop (it is the next stop but the vehicle is not yet nearby) -* About to arrive at the stop -* Currently stopped at the stop. - -According to the GTFS-realtime specification, you can only make use of -`current_status` if the stop sequence is specified. - -The following code shows how you can retrieve and check the value of the -stop status. - -```java -if (vp.hasCurrentStopSequence()) { - int sequence = vp.getCurrentStopSequence(); - - if (vp.hasCurrentStatus()) { - VehicleStopStatus status = vp.getCurrentStatus(); - - switch (status.getNumber()) { - case VehicleStopStatus.IN_TRANSIT_TO_VALUE: - // ... - - case VehicleStopStatus.INCOMING_AT_VALUE: - // ... - - case VehicleStopStatus.STOPPED_AT_VALUE: - // ... - - } - } -} -``` - -### Congestion Levels - -The other two values that may be included with a vehicle position relate -to the congestion inside and outside of the vehicle. - -The `congestion_level` value indicates the flow of traffic. This value does -not indicate whether or not the vehicle is running to schedule, since congestion -levels are typically accounted for in scheduling. - -The following code shows how you can check the congestion level value: - -```java -if (vp.hasCongestionLevel()) { - CongestionLevel congestion = vp.getCongestionLevel(); - - switch (congestion.getNumber()) { - case CongestionLevel.UNKNOWN_CONGESTION_LEVEL_VALUE: - // ... - - case CongestionLevel.RUNNING_SMOOTHLY_VALUE: - // ... - - case CongestionLevel.STOP_AND_GO_VALUE: - // ... - - case CongestionLevel.SEVERE_CONGESTION_VALUE: - // ... - - case CongestionLevel.CONGESTION_VALUE: - // ... - - } -} -``` - -The `occupancy_status` value indicates how full the vehicle currently -is. This can be useful to present to your users so they know what to -expect before the vehicle arrives. For instance: - -* A person with a broken leg may not want to travel on a standing - room-only bus -* Someone traveling late at night might prefer a taxi over an empty - train for safety reasons -* If a bus is full and not accepting passengers, someone may stay at - home for longer until a bus with seats is coming by. - -You can check the occupancy status of a vehicle as follows: - -```java -if (vp.hasOccupancyStatus()) { - OccupancyStatus status = vp.getOccupancyStatus(); - - switch (status.getNumber()) { - case OccupancyStatus.EMPTY_VALUE: - // ... - - case OccupancyStatus.MANY_SEATS_AVAILABLE_VALUE: - // ... - - case OccupancyStatus.FEW_SEATS_AVAILABLE_VALUE: - // ... - - case OccupancyStatus.STANDING_ROOM_ONLY_VALUE: - // ... - - case OccupancyStatus.CRUSHED_STANDING_ROOM_ONLY_VALUE: - // ... - - case OccupancyStatus.FULL_VALUE: - // ... - - case OccupancyStatus.NOT_ACCEPTING_PASSENGERS_VALUE: - // ... - - } -} -``` - -### Determining a Vehicle's Bearing - -One of the values that can be specified in a GTFS-realtime vehicle -position update is the bearing of the vehicle being reported. This value -indicates either the direction the vehicle is facing, or the direction -towards the next stop. - -Ideally, the bearing contains the actual direction that the vehicle is -facing, not the direction to the next stop, since it is possible for -this value to be inaccurate. For example, if a Northbound vehicle is -stopped at a stop (that is, directly beside it), then the calculated -bearing would indicate the vehicle was facing East, not North. - -***Note:** This example assumes that the vehicle is in a country that -drives on the right-hand side of the road.* - -There are several ways to calculate a vehicle's bearing if it is not -specified in a GTFS-realtime feed: - -* Determine the direction towards the next stop -* Determine the direction using a previous vehicle position reading -* A combination of the above. - -***Note:** The GTFS-realtime specification states that feed providers -should not include the bearing if it is calculated using previous -positions. This is because consumers of the feed can calculate this, as -shown in the remainder of this chapter.* - -### Bearing to Next Stop - -In order to determine the bearing from the current location to the next -stop, there are two values you need: - -* **Vehicle position.** This is provided in the `latitude` and - `longitude` fields of the vehicle position. -* **Position of the next stop.** This is provided by the `stop_id` - or `current_stop_sequence` fields of the vehicle position. - -***Note:** Many GTFS-realtime vehicle position feeds do not include -information about the next stop, and consequently this technique will -not work in those instances. If this is the case, using the previous -reading to determine the bearing would be used, as shown later in -*Bearing From Previous Position*.* - -The following formula is used to determine the bearing between the -starting location and the next stop: - -``` -θ = atan2( sin Δλ ⋅ cos φ2 , cos φ1 ⋅ sin φ2 − sin φ1 ⋅ cos φ2 ⋅ cos Δλ ) -``` - -In this equation, the starting point is indicated by *1*, while the next -stop is represented by *2*. Latitude is represented by *φ*, while -longitude is represented by *λ*. For example, *φ2* means the latitude of -the next stop. - -The resultant value *θ* is the bearing between the two points in -*radians*, and must then be converted to degrees (0-360). - -This equation can be represented in Java as follows. It accepts that -latitude and longitude of two points in degrees, and returns the bearing -in degrees. - -```java -public static double calculateBearing(double lat1Deg, double lon1Deg, double lat2Deg, double lon2Deg) { - - // Convert all degrees to radians - double lat1 = Math.toRadians(lat1Deg); - double lon1 = Math.toRadians(lon2Deg); - double lat2 = Math.toRadians(lat2Deg); - double lon2 = Math.toRadians(lon2Deg); - - // sin Δλ ⋅ cos φ2 - double y = Math.sin(lon2 - lon1) * Math.cos(lat2); - - // cos φ1 ⋅ sin φ2 − sin φ1 ⋅ cos φ2 ⋅ cos Δλ - double x = Math.cos(lat1) * Math.sin(lat2) - Math.sin(lat1) * Math.cos(lat2) * Math.cos(lon2 - lon1); - - // Calculate the bearing in radians - double bearingRad = Math.atan2(y, x); - - // Convert radians to degrees - double bearingDeg = Math.toDegrees(bearingRad); - - // Ensure x is positive, in the range of 0 <= x < 360 - if (bearingDeg < 0) { - bearingDeg += 360; - } - - return bearingDeg; -} -``` - -One thing to be aware of when using the next stop to determine bearing -is the `current_status` value of the vehicle position (if specified). -If the value is `STOPPED_AT`, then the calculated angle might be -significantly wrong, since the vehicle is likely directly next to the -stop. In this instance, you should either use the next stop, or -determine the bearing from the previous position reading. - -### Bearing From Previous Position - -Similar to calculating a vehicle's bearing using the direction to the -next stop, you can also use a previous reading to calculate the -vehicle's direction. - -The following diagram shows two readings for the same vehicle, as well -as the calculated bearing from the first point to the second. - -![Bearing From Previous Position](images/bearing-from-position.png) - -To calculate the bearing in this way, you need the following data: - -* **Current vehicle position.** This is provided in the `latitude` - and `longitude` fields of the vehicle position. -* **Previous vehicle position.** This should be the most recent - vehicle position recorded prior to receiving the current vehicle - position. Additionally, this position must represent a different - location to the current position. If a vehicle is stationary for - several minutes, you may receive several locations for the vehicle - with the same coordinates. - -***Note:** In the case of multiple readings at the same location, you -should use a minimum distance threshold to decide whether or not the -location is the same. For instance, you may decide that the two previous -locations must be more than, say, 10 meters to use it for comparison.* - -It is also necessary to check the age of the previous reading. If the -previous reading is more than a minute or two old, it is likely that the -vehicle has travelled far enough to render the calculated bearing -meaningless. - -### Combination - -These two strategies are useful to approximate a vehicle's bearing if -it is not specified in a vehicle position message, but they are still -reliant on certain data being available. - -The first technique needs to know the next stop, while the second needs -a previous vehicle position reading. - -Your algorithm to determine a vehicle's position could be as follows: - -* Use the provided bearing value in the vehicle position if this - available. -* Otherwise, if you have a recent previous reading, calculate the - direction using that and the current reading. -* Otherwise, if the next stop is known, show the bearing of the - vehicle towards the stop. - diff --git a/.idea/gtfs-realtime-book/ch-08-consuming-trip-updates.md b/.idea/gtfs-realtime-book/ch-08-consuming-trip-updates.md deleted file mode 100644 index 867e30f..0000000 --- a/.idea/gtfs-realtime-book/ch-08-consuming-trip-updates.md +++ /dev/null @@ -1,380 +0,0 @@ -## 8. Consuming Trip Updates - -Of the three message types in GTFS-realtime, trip updates are the most -complex. A single trip update can contain a large quantity of data and -is used to transform the underlying schedule. Trips can be modified in a -number of ways: trips can be canceled, stops can be skipped, and arrival -times can be updated. - -This chapter will show you how to consume trip updates and will discuss -real-world scenarios and how they can be represented using trip updates. - -Similar to service alerts and vehicle positions, you can loop over the -`FeedEntity` objects from `getEntityList()` to access and then -process `TripUpdate` objects. - -```java -for (FeedEntity entity : fm.getEntityList()) { - if (entity.hasTripUpdate()) { - TripUpdate tripUpdate = entity.getTripUpdate(); - - processTripUpdate(tripUpdate); - } -} -``` - -### Timestamp - -Just like with vehicle position updates, trip updates include a -timestamp value. This indicates when the real-time data was updated, -including any subsequent arrival/departure estimates contained within -the trip update. - -You can read this value into a native `java.util.Date` object as -follows: - -```java -if (tripUpdate.hasTimestamp()) { - Date timestamp = new Date(tripUpdate.getTimestamp() * 1000); - -} -``` - -**Note:** The value is multiplied by 1,000 because the java.util.Date -class accepts milliseconds, whereas GTFS-realtime uses whole seconds. - -Two of the ways you can use the timestamp value are: - -* When telling your users arrival estimates, you can also show the - timestamp so they know when the estimate was made. A newer estimate - (e.g. one minute old) is likely to be more reliable than an older - one (e.g. ten minutes old). -* You can also use the timestamp to decide whether or not to keep the - estimate. For instance, if for some reason a feed did not refresh in - a timely manner and all of the estimates were hours old, you could - simply skip over them as they would no longer provide meaningful - information. - -### Trip Information - -Each trip update contains necessary data so the included estimates can -be linked to a trip in the corresponding GTFS feed. - -There are three ways estimate data can be provided in a trip update: - -* The trip update contains estimates for all stops. -* The trip update contains estimates for some stops. -* The trip has been canceled, so there is no stop information. - -If the trip has been added (that is, it is not a part of the schedule in -the GTFS feed), then the trip descriptor has no use in resolving the -trip back to the GTFS feed. - -However, if a trip update only contains updates for some of the stops, -you must be able to find the entire trip in the GTFS feed so you can -propagate the arrival delay to subsequent stops. - -The following code shows how to extract values such as the GTFS trip ID -and route ID from the `TripDescriptor` object. - -```java -if (tripUpdate.hasTrip()) { - TripDescriptor trip = tripUpdate.getTrip(); - - if (trip.hasTripId()) { - String tripId = trip.getTripId(); - - // ... - } - - if (trip.hasRouteId()) { - String routeId = trip.getRouteId(); - - // ... - } - - if (trip.hasStartDate()) { - String startDate = trip.getStartDate(); - - // ... - } - - if (trip.hasStartTime()) { - String startTime = trip.getStartTime(); - - // ... - } - - if (trip.hasScheduleRelationship()) { - ScheduleRelationship sr = trip.getScheduleRelationship(); - - // ... - } -} -``` - -### Trip Delay - -Although only an experimental part of the GTFS-realtime specification at -the time of writing, a trip update can also contain a delay value. A -positive number indicates the number of seconds the vehicle is late, -while a negative value indicates the number of seconds early. A value of -`0` means the vehicle is on-time. - -This value can be used for any stop along the trip that does not -otherwise have an associated `StopTimeEvent` element. - -```java -if (tripUpdate.hasDelay()) { - int delay = tripUpdate.getDelay(); - - if (delay == 0) { - // on time - - } - else if (delay < 0) { - // early - - } - else if (delay > 0) { - // late - - } -} -``` - -### Vehicle Identifiers - -The `VehicleDescriptor` object contained in a trip update provides a -number of values by which to identify a vehicle. You can access an -internal identifier (not for public display), a label (such as a vehicle -number painted on to a vehicle), or a license plate. - -The following code shows how to access these values: - -```java -if (tripUpdate.hasVehicle()) { - VehicleDescriptor vehicle = tripUpdate.getVehicle(); - - if (vehicle.hasId()) { - String id = vehicle.getId(); - - } - - if (vehicle.hasLabel()) { - String label = vehicle.getLabel(); - - } - - if (vehicle.hasLicensePlate()) { - String licensePlate = vehicle.getLicensePlate(); - - } -} -``` - -The vehicle descriptor and the values contained within are all optional. -In the case where this information is not available, you can use the -trip descriptor information provided with each vehicle position to match -up vehicle positions across multiple updates. - -### Stop Time Updates - -Each trip update contains a number of stop time updates, each of which -is a `StopTimeUpdate` object. You can access each of the -`StopTimeUpdate` objects by calling **getStopTimeUpdateList()**. - -```java -for (StopTimeUpdate stopTimeUpdate : tripUpdate.getStopTimeUpdateList()) { - // ... -} -``` - -Alternatively, you can loop over each `StopTimeUpdate` object as -follows: - -```java -for (int i = 0; i < tripUpdate.getStopTimeUpdateCount(); i++) { - StopTimeUpdate stopTimeUpdate = tripUpdate.getStopTimeUpdate(i); - - // ... -} -``` - -Each `StopTimeUpdate` object contains a schedule relationship value -(using the `ScheduleRelationship` class, which is different to that -contained in `TripDescriptor` objects). - -The schedule relationship dictates how to use the rest of the data in -the stop time update, as well as which data will be present. If the -schedule relationship value is not present, the value is assumed to be -`SCHEDULED`. - -```java -ScheduleRelationship sr; - -if (stopTimeUpdate.hasScheduleRelationship()) { - sr = stopTimeUpdate.getScheduleRelationship(); - -} -else { - sr = ScheduleRelationship.SCHEDULED; - -} - -if (sr.getNumber() == ScheduleRelationship.SCHEDULED_VALUE) { - // An arrival and/or departure estimate is provided - -} -else if (sr.getNumber() == ScheduleRelationship.NO_DATA_VALUE) { - // No real-time data available in this update - -} -else if (sr.getNumber() == ScheduleRelationship.SKIPPED_VALUE) { - // The vehicle will not stop at this stop - -} -``` - -### Stop Information - -In order to determine which stop a stop time update corresponds to, -either the stop ID or stop sequence (or both) must be specified. You can -then look up the stop based on its entry in `stops.txt`, or determine -the stop based on the corresponding entry in `stop_times.txt`. - -```java -if (stopTimeUpdate.hasStopId()) { - String stopId = stopTimeUpdate.getStopId(); - -} - -if (stopTimeUpdate.hasStopSequence()) { - int sequence = stopTimeUpdate.getStopSequence(); - -} -``` - -### Arrival/Departure Estimates - -If the `ScheduleRelationship` value is `SKIPPED` there must either -be an arrival or departure object specified. Both the arrival and -departure use the `StopTimeEvent` class, which can be accessed as -follows. - -The following code shows how to access these values: - -```java -if (sr.getNumber() == ScheduleRelationship.SCHEDULED_VALUE) { - if (stopTimeUpdate.hasArrival()) { - StopTimeEvent arrival = stopTimeUpdate.getArrival(); - - // Process the arrival - } - - if (stopTimeUpdate.hasDeparture()) { - StopTimeEvent departure = stopTimeUpdate.getDeparture(); - - // Process the departure - } -} -``` - -Each `StopTimeEvent` object contains either an absolute timestamp for -the arrival or departure, or it contains a delay value. The delay value -is relative to the scheduled time in the corresponding GTFS feed. - -```java -if (stopTimeUpdate.hasArrival()) { - StopTimeEvent arrival = stopTimeUpdate.getArrival(); - - if (arrival.hasDelay()) { - int delay = arrival.getDelay(); - - // ... - } - - if (arrival.hasTime()) { - Date time = new Date(arrival.getTime() * 1000); - - // ... - } - - if (arrival.hasUncertainty()) { - int uncertainty = arrival.getUncertainty(); - - // ... - } -} -``` - -### Trip Update Scenarios - -This chapter has shown you how to handle trip update information as it -appears in a GTFS-realtime feed. It is also important to understand the -intent of the data provider when reading these updates. Consider the -following scenarios that can occur frequently in large cities: - -1. **A train needs to skip one or more stops.** Perhaps the station has - been closed temporarily due to unforeseen circumstances. -2. **A train that was scheduled to bypass a station will now stop at - it.** Perhaps there is a temporary delay on the line so the train - will wait at a stop while the track is cleared. -3. **A bus is rerouted down a different street.** Perhaps there was a - car accident earlier and police have redirected traffic. -4. **A train will stop at a different platform.** For example, instead - of a train stopping at platform 5 at a given station, it will now - stop at platform 6. This happens frequently on large train networks - such as in Sydney. -5. **A bus trip is completely canceled.** Perhaps the bus has broken - down and there is no replacement vehicle. -6. **An unplanned trip is added.** Perhaps there is an unexpectedly - large number of passengers so extra buses are brought in to clear - the backlog. - -While each provider may represent these scenarios differently in -GTFS-realtime, it is likely each of them would be represented as -follows. - -1. **Modify the existing trip.** Include a `StopTimeUpdate` for each - stop that is to be canceled with a `ScheduleRelationship` value of - `SKIPPED`. -2. **Cancel the trip and add a new one.** Unfortunately, there is no - way in GTFS-realtime to insert a stop into an existing trip. The - `ScheduleRelationship` value for the trip would be set to - `CANCELED` (in the `TripDescriptor`, not in the - `StopTimeUpdate` elements). -3. If a bus is rerouted down a different street, how it is handled - depends on which stops are missed: - - * If no stops are to be made on the new street, cancel the stops - impacted by the detour, similar to scenario 1. - * If the bus will stop on the new street to drop passengers off or - pick them up, then cancel the trip and add a new one, similar to - scenario 2. - -4. **Cancel the trip and add a new one.** Since it is not possible to - insert a stop using GTFS-realtime, the existing trip must be - canceled and a new trip added to replace it if you want the platform - to be reflected accurately. Note, however, that many data providers - do not differentiate between platforms in their feeds, so they only - refer to the parent station instead. In this instance a platform - change should be communicated using service alerts if it could - otherwise cause confusion. -5. **Cancel the trip.** In this instance you would not need to include - any `StopTimeUpdate` elements for the trip; rather, you would - specify `CANCELED` in the `ScheduleRelationship` field of - `TripDescriptor`. -6. **Add a new trip.** When adding a new trip, all of the stops in the - trip should also be included (not just the next one). The - `ScheduleRelationship` for the trip would be set to `ADDED`. - -The important takeaway from this section is that if you are telling your -users that a trip has been canceled, you need to make it clear to them -if a new trip has replaced it, otherwise they may not correctly -understand the intent of the data. - -In these instances, hopefully the data provider also provides a -corresponding service alert so the reason for the change can be -communicated to passengers. - diff --git a/.idea/gtfs-realtime-book/ch-09-database-storage.md b/.idea/gtfs-realtime-book/ch-09-database-storage.md deleted file mode 100644 index e9b42f2..0000000 --- a/.idea/gtfs-realtime-book/ch-09-database-storage.md +++ /dev/null @@ -1,492 +0,0 @@ -## 9. Storing Feed Data in a Database - -This chapter will demonstrate how to save data from a GTFS-realtime feed -into an SQLite database. In order to look up trips, stops, routes and -other data from the GTFS-realtime feed, this SQLite database will also -contain the data from the corresponding GTFS feed. - -To do this, the `GtfsToSql` and `GtfsRealTimeToSql` tools which have -been written for the purpose of this book and the preceding book, *The -Definitive Guide to GTFS* ([http://gtfsbook.com](http://gtfsbook.com/)), -will be used. - -GTFS and GTFS-realtime feeds from the MBTA in Boston -() will also be used. - -### Storing GTFS Data in an SQLite Database - -*The Definitive Guide to GTFS* demonstrated how to populate an SQLite -database using `GtfsToSql`. Here is an abbreviated version of the -steps required to do so. - -First, download `GtfsToSql` from -. This is a Java command-line -application to import a GTFS feed into an SQLite database. - -The pre-compiled `GtfsToSql` Java archive can be downloaded from its -GitHub repository at -. - -Next, download the MBTA GTFS feed, available from -. - -``` -$ curl http://www.mbta.com/uploadedfiles/MBTA_GTFS.zip -o gtfs.zip - -$ unzip gtfs.zip -d mbta/ -``` - -To create an SQLite database from this feed, the following command can -be used: - -``` -$ java -jar GtfsToSql.jar -s jdbc:sqlite:./db.sqlite -g ./mbta -o -``` - -***Note:** The -o flag enables the recording of additional useful data in -the database. For instance, each entry in the trips table will contain -the departure and arrival time (which would otherwise only be available -by looking up the `stop_times` table).* - -This may take a minute or two to complete (you will see progress as it -imports the feed and then creates indexes), and at the end you will have -a GTFS database in a file called `db.sqlite`. You can then query this -database with the command-line `sqlite3` tool, as shown in the -following example: - -``` -$ sqlite3 db.sqlite - -sqlite> SELECT agency_id, agency_name, agency_url, agency_lang FROM agency; - -2|Massport|http://www.massport.com|EN - -1|MBTA|[http://www.mbta.com](http://www.mbta.com/)|EN -``` - -***Note:** For more information about storing and querying GTFS data, -please refer to *The Definitive Guide to GTFS*, available from -[http://gtfsbook.com](http://gtfsbook.com/).* - -### Storing GTFS-realtime Data in an SQLite Database - -Once the GTFS data has been imported into the SQLite database, you can -then start importing GTFS-realtime data. For this you can use the -`GtfsRealTimeToSql` tool specifically written to import data into an -SQLite database. - -***Note:** Technically you do not need the GTFS data present in the -database as well, but having it makes it far simpler to resolve trip, -route and stop data.* - -The GTFS data will change infrequently (perhaps every few weeks or -months), while the realtime data can change several times per minute. -`GtfsRealTimeToSql` will frequently update a feed, according to the -refresh time specified. Each time it updates, the data saved on the -previous iteration is deleted, as that data is no longer the most -up-to-date data. - -To get started, download `GtfsRealTimeToSql` from its GitHub -repository at . Just -like `GtfsToSql`, this is a Java command-line application. The -pre-compiled `GtfsRealTimeToSql` Java archive can be downloaded from -. - -Since the `db.sqlite` database contains the MBTA GTFS feed, you can -now run `GtfsRealTimeToSql` with one of MBTA's GTFS-realtime feeds. -For instance, their vehicle positions feed is located at -. - -``` -java -jar GtfsRealTimeToSql.jar \ - -u "http://developer.mbta.com/lib/gtrtfs/Vehicles.pb" \ - -s jdbc:sqlite:./db.sqlite \ - -refresh 15 -``` - -When you run this command, the vehicle positions feed will be retrieved -every 15 seconds (specified by the `-refresh` parameter), and the data -will be saved into the `db.sqlite` SQLite database. - -MBTA also have a trip updates feed and a service alerts feed. In order -to load each of these feeds you will need to run `GtfsRealTimeToSql` -three separate times. To allow it to run in the background, add the -`-d` parameter (to make it run as a server daemon), and background it -using **&**. - -To load the vehicle positions in the background, stop the previous -command, then run the following command instead. - -``` -java -jar GtfsRealTimeToSql.jar \ - -u "http://developer.mbta.com/lib/gtrtfs/Vehicles.pb" \ - -s jdbc:sqlite:./db.sqlite \ - -refresh 15 \ - -d & -``` - -Now you can also load the trip updates into the same `db.sqlite` file. -MBTA's trip updates feed is located at -. The following -command reloads the trip updates every 30 seconds: - -``` -java -jar GtfsRealTimeToSql.jar \ - -u "http://developer.mbta.com/lib/gtrtfs/Passages.pb" \ - -s jdbc:sqlite:./db.sqlite \ - -refresh 30 \ - -d & -``` - -***Note:** You may prefer more frequent updates (such as 15 seconds), or -even less frequent (such as 60 seconds). The more frequently you update, -the greater your server utilization will be.* - -Finally, to load the service alerts feed, use the same command, but now -with the feed located at -. Generally, -service alerts are updated infrequently by providers, so you can use a -refresh time such as five or ten minutes (300 or 600 seconds). - -``` -java -jar GtfsRealTimeToSql.jar \ - -u "http://developer.mbta.com/lib/gtrtfs/Alerts/Alerts.pb" \ - -s jdbc:sqlite:./db.sqlite \ - -refresh 300 \ - -d & -``` - -These three feeds will continue to be reloaded until you terminate their -respective processes. - -### Querying Vehicle Positions - -When using `GtfsRealTimeToSql`, vehicle positions are stored in a -table called `gtfs_rt_vehicles`. If you want to retrieve positions -for, say, the route with an ID of 742, you can run the following query: - -``` -$ sqlite3 db.sqlite - -sqlite> SELECT route_id, trip_id, trip_date, trip_time, trip_sr, latitude, longitude - FROM gtfs_rt_vehicles - WHERE route_id = 742; -``` - -This query returns the trip descriptor and GPS coordinates for all -vehicles on the given route. The following table shows sample results -from this query. - -| `route_id` | `trip_id` | `trip_date` | `trip_time` | `trip_sr` | `latitude` | `longitude` | -| :--------- | :--------- | :----------- | :---------- | :-------- | :--------- | :---------- | -| 742 | 25900860 | 20150315 | | 0 | 42.347309 | -71.040359 | -| 742 | | | | | 42.331505 | -71.065590 | - -***Note:** The trip_sr column corresponds to the trip's schedule -relationship value.* - -This particular snapshot of vehicle position data shows two different -trips for route 742, which corresponds to the SL2 Silver Line. - -``` -sqlite> SELECT route_short_name, route_long_name FROM routes WHERE -route_id = 742; - -SL2|Silver Line SL2 -``` - -The first five columns retrieved are the trip identifier fields, which -are used to link a vehicle position with a trip from the GTFS feed. The -first row is simple: the `trip_id` value can be used to look up the -row from the `trips` table. - -``` -sqlite> SELECT service_id, trip_headsign, direction_id, block_id - -FROM trips - -WHERE trip_id = '25900860'; -``` - -The results from this query are as follows. - -| `service_id` | `trip_headsign` | `direction_id` | `block_id` | -| :--------------------------- | :-------------- | :------------- | :--------- | -| BUSS12015-hbs15017-Sunday-02 | South Station | 1 | S742-61 | - -***Note:** To see all fields available in this or any other table -(including `gtfs_rt_vehicles`), use the .schema command in SQLite. For -instance, enter the command `.schema gtfs_rt_vehicles`.* - -One helpful thing MBTA does is to include the trip starting date when -they include the trip ID. This helps to disambiguate trips that may -start or finish around midnight or later. - -The second vehicle position is another matter. This record does not -include any trip descriptor information other than the route ID. This -means you cannot reliably link this vehicle position with a specific -trip from the GTFS feed. - -However, knowing the route ID and the vehicle's coordinates may be -enough to present useful information to the user: "A bus on the Silver -Line SL2 route is at this location." You cannot show them if the vehicle -is running late, early or on-time, but you can show the user where the -vehicle is. - -### Querying Trip Updates - -The `GtfsRealTimeToSql` tool stores trip update data in two tables: -one for the main trip update data (such as the trip descriptor), and -another to store each individual stop time event that belongs within -each update. - -For example, to retrieve all trip updates for the route with ID 742 once -again, you could use the following query: - -```sql -SELECT route_id, trip_id, trip_date, trip_time, trip_sr, vehicle_id, vehicle_label - FROM gtfs_rt_trip_updates - WHERE route_id = '742'; -``` - -This query will return data similar to the following table: - -| `route_id` | `trip_id` | `trip_date` | `trip_time` | `trip_sr` | `vehicle_id` | `vehicle_label` | -| :--------- | :-------- | :---------- | :---------- | :-------- | :----------- | :-------------- | -| 742 | 25900860 | 20150315 | | 0 | y1106 | 1106 | -| 742 | 25900856 | 20150315 | | 0 | | | -| 742 | 25900858 | 20150315 | | 0 | | | - -Each of these returned records has corresponding records in the -`gtfs_rt_trip_updates_stoptimes` table that includes the -arrival/departure estimates for various stops on the trip. - -When using `GtfsRealTimeToSql`, an extra column called `update_id` -is included on both tables so they can be linked together. For example, -to retrieve all updates for the route ID 742, you can join the tables -together as follows: - -```sql -SELECT trip_id, arrival_time, arrival_delay, departure_time, departure_delay, stop_id, stop_sequence - FROM gtfs_rt_trip_updates_stoptimes - JOIN gtfs_rt_trip_updates USING (update_id) - WHERE route_id = '742'; -``` - -MBTA's trip updates feed only includes a stop time prediction for the -next stop. This means that each trip in the results only has one -corresponding record. You must then apply the delay to subsequent stop -times for the given trip. - -| `trip_id` | `arrival_time` | `arrival_delay` | `departure_time` | `departure_delay` | `stop_id` | `stop_sequence` | -| :-------- | :------------- | :-------------- | :--------------- | :---------------- | :-------- | :-------------- | -| 25900860 | | 30 | | | 74617 | 10 | -| 25900856 | | | | 0 | 74611 | 1 | -| 25900858 | | | | 0 | 31255 | 1 | - -There are several things to note in these results. Firstly, MBTA do not -provide a timestamp for `arrival_time` and `departure_time`; they -only provide the delay offsets that can be compared to the scheduled -time in the GTFS feed. - -Secondly, the second and third trips listed have not yet commenced. You -can deduce this just by looking at this table, since the next stop has -stop sequence of `1` (and therefore there are no stops before it). - -The first trip is delayed by 30 seconds. In other words, it will arrive -30 seconds later than it was scheduled. The data received from the -GTFS-realtime feed does not actually indicate the arrival timestamp, but -you can look up the corresponding stop time from the GTFS feed to -determine this. - -The following query demonstrates how to look up the arrival time: - -```sql -SELECT s.stop_name, st.arrival_time - FROM stop_times st, trips t, stops s - WHERE st.trip_index = t.trip_index - AND st.stop_index = s.stop_index - AND s.stop_id = '74617' - AND t.trip_id = '25900860' - AND st.stop_sequence = 10; -``` - -***Note:** The `GtfsToSql` tool used to import the GTFS feed adds fields -such as trip_index and stop_index in order to speed up data searching. -There is more discussion on the rationale of this in *The Definitive -Guide to GTFS*, available from -[http://gtfsbook.com](http://gtfsbook.com/).* - -This query joins the `stop_times` table to both the `trips` and -`stops` table in order to look up the corresponding arrival time. The -final three rows in the query contain the values returned above (the -`stop_id`, `trip_id` and the `stop_sequence`), to find the -following record: - -``` -South Station Silver Line - Inbound|21:17:00 -``` - -This means the scheduled arrival time for South Station Silver Line is -9:17:00 PM. Since the estimate indicates a delay of 30 seconds, the new -arrival time is 9:17:30 PM. - -***Note:** Conversely, if the delay was -30 instead of 30, the vehicle -would be early and arrive at 9:16:30 PM.* - -One thing not touched upon in this example is the stop time's schedule -relationship. The `gtfs_rt_trip_updates_stoptimes` also includes a -column called `rship`, which is used to indicate the schedule -relationship for the given stop. If this value was skipped (a value of -`1`), then it means the vehicle will stop here. - -### Determining Predictions Using Blocks - -In GTFS, the `block_id` value in **trips.txt** is used to indicate a -series of one or more trips undertaken by a single vehicle. In other -words, once it gets to the end of one trip, it starts a new trip from -that location. If a given trip is very short, a vehicle may perform up -to fifty or one hundred trips in a single day. - -This can be useful for determining estimates for future trips that may -not be included in the GTFS feed. For instance, if a trip is running 30 -minutes late, then it is highly likely that subsequent trips on that -block will also be running late. - -Referring back to the trip data returned in the above example, the -following data can be retrieved from the GTFS feed: - -```sql -SELECT trip_id, block_id, service_id, departure_time, arrival_time - FROM trips - WHERE trip_id IN ('25900860', '25900856', '25900858') - ORDER BY departure_time; -``` - -This returns the following data. - -| `trip_id` | `block_id` | `service_id` | `departure_time` | `arrival_time` | -| :-------- | :--------- | :--------------------------- | :--------------- | :------------- | -| 25900860 | S742-61 | BUSS12015-hbs15017-Sunday-02 | 21:04:00 | 21:17:00 | -| 25900856 | S742-61 | BUSS12015-hbs15017-Sunday-02 | 21:18:00 | 21:28:00 | -| 25900858 | S742-61 | BUSS12015-hbs15017-Sunday-02 | 21:35:00 | 21:48:00 | - -Each of these trips have the same values for `block_id` and -`service_id`, meaning the same vehicle will complete all three trips. -The data indicates that the first trip is running 30 seconds late, so -its arrival time will be 21:17:30. - -Because the second trip is scheduled to depart at 21:18:00, there is a -buffer time to catch up (in other words, the first trip is only 30 -seconds late, so hopefully it will not impact the second trip). - -If, for instance, the second trip ran 10 minutes late (and therefore -arrived at 21:38:00 instead of 21:28:00), you could reasonably assume -the third trip would begin at about 21:38:00 instead of 21:35:00 (about -three minutes late). - -### Querying Service Alerts - -When using `GtfsRealTimeToSql` to store service alerts, data is stored -in three tables. The main service alert information is stored in -`gtfs_rt_alerts`. Since there can be any number of time ranges or -affected entities for any given alert, there is a table to hold time -ranges (`gtfs_rt_alerts_timeranges`) and one to hold affected entities -(`gtfs_rt_alerts_entities`). - -In order to link this data together, each alert has an `alert_id` -value (created by `GtfsRealTimeToSql`) which is also present for any -corresponding time range and affected entities records. - -To retrieve service alerts from the database, you can use the following -query: - -```sql -SELECT alert_id, header, description, cause, effect FROM gtfs_rt_alerts; -``` - -At the time of writing, this yields the following results. The -description has been shortened as they are quite long and descriptive in -the original feed. - -| `alert_id` | `header` | `description` | `cause` | `effect` | -| :--------- | :--------------------------- | :--------------------------------------------------------- | :------- | :------- | -| 6 | Route 11 detour | Route 11 outbound detoured due to ... | 1 | 4 | -| 11 | Ruggles elevator unavailable | ... Commuter Rail platform to the lobby is unavailable ... | 9 | 7 | -| 33 | Extra Franklin Line service | | 1 | 5 | - -**Note: **A cause value of 1 corresponds to UNKNOWN_CAUSE, while 9 -corresponds to MAINTENANCE. An effect value of 4 corresponds to DETOUR, -7 corresponds to OTHER_EFFECT, while 5 corresponds to -ADDITIONAL_SERVICE. - -To determine the timing of these alerts, look up the -`gtfs_rt_alerts_timeranges` for the given alerts. - -``` -$ export TZ=America/New_York - -$ sqlite3 db.sqlite - -sqlite> SELECT alert_id, datetime(start, 'unixepoch', 'localtime'), datetime(finish, 'unixepoch', 'localtime') - FROM gtfs_rt_alerts_timeranges - WHERE alert_id IN (6, 11, 33); -``` - -***Note:** In this example, the system timezone has been temporarily -changed to America/New_York (the timezone specified in MBTA's -agency.txt file) so the timestamps are formatted correctly. You may -prefer instead to format the start and finish timestamps using your -programming language of choice.* - -| `alert_id` | `start` (formatted) | `finish` (formatted) | -| :--------- | :------------------ | :------------------- | -| 6 | 2015-02-17 15:56:52 | | -| 11 | 2015-03-18 06:00:00 | 2015-03-18 16:00:00 | -| 11 | 2015-03-19 06:00:00 | 2015-03-19 16:00:00 | -| 33 | 2015-03-16 10:58:38 | 2015-03-18 02:30:00 | - -These dates indicate the following: - -* The alert with an ID of 6 began on February 17 and has no specified end date. -* The alert with an ID of 11 will occur over two days between 6 AM and 4 PM. - *The alert with an ID of 33 will last for almost two days. - -Finally, to determine which entities (routes, trips, stops) are affected -by these alerts, query the `gtfs_rt_alerts_entities` table. - -```sql -SELECT alert_id, agency_id, route_id, route_type, stop_id, trip_id, trip_start_date, trip_start_time, trip_rship - FROM gtfs_rt_alerts_entities WHERE alert_id IN (6, 11, 33); -``` - -This results in the following data, describing only an entity for the -final service alert. - -| `alert_id` | `agency_id` | `route_id` | `route_type` | `stop_id` | `trip_id` | `trip_start_date` | `trip_start_time` | `trip_rship` | -| :--------- | :---------- | :---------- | :----------- | :-------- | :-------- | :---------------- | :---------------- | :----------- | -| 6 | 1 | CR-Franklin | 2 | | | | | | - -Using the `route_id` value, you can find the route from the GTFS feed: - -```sql -SELECT agency_id, route_type, route_long_name, route_desc - FROM routes WHERE route_id = 'CR-Franklin'; -``` - -This query will result in the following data: - -| `agency_id` | `route_type` | `route_long_name` | `route_desc` | -| :---------- | :----------- | :---------------- | :------------ | -| 1 | 2 | Franklin Line | Commuter Rail | - -In effect, this means that if you have a web site or app displaying the -schedule for the Franklin line, this alert should be displayed so the -people who use the line are aware of the change. - -***Note:** Even though the `agency_id` and `route_type` values are included, -you do not really need these since the `route_id` can only refer to one -row in the GTFS feed. If instead you wanted to refer to ALL rail lines, -the alert would have the `route_id` value blank but keep the `route_type` -value.* - diff --git a/.idea/gtfs-realtime-book/ch-10-extensions.md b/.idea/gtfs-realtime-book/ch-10-extensions.md deleted file mode 100644 index 993814c..0000000 --- a/.idea/gtfs-realtime-book/ch-10-extensions.md +++ /dev/null @@ -1,374 +0,0 @@ -## 10. GTFS-realtime Extensions - -*Introduction to gtfs-realtime.proto* showed you an extract -from the `gtfs-realtime.proto` file that is used as an input to the -Protocol Buffers `protoc` command. - -One of the lines in this extract included the following `extensions` -directive: - -``` -extensions 1000 to 1999; -``` - -Each element in a Protocol Buffers entity must be assigned a value. This -is the value used to represent the element in the binary protocol buffer -stream (for instance, the `trip` element was assigned a value of -`1`). The `extensions` directive reserves the values between 1000 -and 1999 for external use. - -Having these values reserved means that transit agencies are free to -include additional information in their own GTFS-realtime feeds. In this -instance, the agency provides its own `proto` file to use with -`protoc` that builds on the original `gtfs-realtime.proto` file. - -At the time of writing, the following extensions are defined: - -| Extension ID | Developer | -| :----------- | :-------- | -| 1000 | OneBusAway | -| 1001 | New York City MTA | -| 1002 | Google | -| 1003 | OVapi | -| 1004 | Metra | - -You can find this list at -). As more -agencies release GTFS-realtime feeds this list will grow, since many -agencies have specific requirements in the way their internal systems -work, as well as to facilitate providing data in a way that is -understood by users of the given transport system. - -To demonstrate how extensions are specified and used, the following case -study will show you the extension for the New York City subway system. - -### Case Study: New York City Subway - -One of the agencies that provides an extension is New York City MTA. -They add a number of custom fields to their subway GTFS-realtime feeds. - -The following is a slimmed-down version of their Protocol Buffers -definition file, available from -. - -``` -option java_package = "com.google.transit.realtime"; - -import "gtfs-realtime.proto"; - -message TripReplacementPeriod { - optional string route_id = 1; - optional transit_realtime.TimeRange replacement_period = 2; -} - -message NyctFeedHeader { - required string nyct_subway_version = 1; - repeated TripReplacementPeriod trip_replacement_period = 2; -} - -extend transit_realtime.FeedHeader { - optional NyctFeedHeader nyct_feed_header = 1001; -} - -message NyctTripDescriptor { - optional string train_id = 1; - optional bool is_assigned = 2; - - enum Direction { - NORTH = 1; - EAST = 2; - SOUTH = 3; - WEST = 4; - } - - optional Direction direction = 3; -} - -extend transit_realtime.TripDescriptor { - optional NyctTripDescriptor nyct_trip_descriptor = 1001; -} - -// NYCT Subway extensions for the stop time update - -message NyctStopTimeUpdate { - optional string scheduled_track = 1; - optional string actual_track = 2; -} - -extend transit_realtime.TripUpdate.StopTimeUpdate { - optional NyctStopTimeUpdate nyct_stop_time_update = 1001; -} -``` - -This file begins by importing the original `gtfs-realtime.proto` file -that was examined in *Introduction to gtfs-realtime.proto*. - -It then defines a number of new element types (they choose to prefix -them using `Nyct`, although the only important thing here is that they -don't conflict with the names from `gtfs-realtime.proto`). - -This definition file then extends the `TripDescriptor` and -`StopTimeUpdate` element types. Values -`1000` to `1999` are reserved for custom extensions. This is the -reason why the added `nyct_trip_descriptor` and -`nyct_stop_time_update` fields use numbers in this range. - -Compiling an Extended Protocol Buffer - -In order to make use of these extended fields, you first need to -download and compile the `nyct-subway.proto` file into the same -directory as `gtfs-realtime.proto` (*Compiling gtfs-realtime.proto*). - -``` -$ cd /path/to/protobuf - -$ curl \ - http://datamine.mta.info/sites/all/files/pdfs/nyct-subway.proto.txt \ - -o nyct-subway.proto -``` - -You can now build a Java class similar to before, but using the -`nyct-subway.proto` file as the input instead of -`gtfs-realtime.proto`: - -``` -$ protoc \ - --proto_path=/path/to/protobuf \ - --java_out=/path/to/gtfsrt/src \ - /path/to/protobuf/nyct-subway.proto -``` - -If this command executes successfully, you will now have a file called -`NyctSubway.java` in the **./src/com/google/transit/realtime** -directory (in addition to `GtfsRealtime.java`, which is still -required). - -The next section will show you how to use this class. - -### Registering Extensions - -The process to load an extended GTFS-realtime is the same as a regular -feed (in that you call **parseFrom()** to build the `FeedMessage` -object), but first you must register the extensions. - -In Java, this is achieved using the `ExtensionRegistry` class and the -**registerAllExtensions()** helper method. - -``` -import com.google.protobuf.ExtensionRegistry; - -... - -ExtensionRegistry registry = ExtensionRegistry.newInstance(); - -NyctSubway.registerAllExtensions(registry); -``` - -The `ExtensionRegistry` object is then passed to **parseFrom()** as -the second argument. The following code shows how to open and read the -main New York City subway feed. - -***Note:** A developer API key is required to access the MTA -GTFS-realtime feeds. You can register for a key at -[http://datamine.mta.info](http://datamine.mta.info/), then substitute -it into the key variable.* - -```java -public class YourClass { - public void loadFeed() throws IOException { - - String key = "*YOUR_KEY*"; - - URL url = new URL("http://datamine.mta.info/mta_esi.php?feed_id=1&key=" + key); - - InputStream is = url.openStream(); - - ExtensionRegistry registry = ExtensionRegistry.newInstance(); - - NyctSubway.registerAllExtensions(registry); - - FeedMessage fm = FeedMessage.parseFrom(is, registry); - - is.close(); - - // ... - - } -} -``` - -Once the feed has been parsed, you will no longer need to pass the -extension registry around. - -### Accessing Extended Protocol Buffer Elements - -Earlier it was explained that the general paradigm with reading -GTFS-realtime data is to check the existence of a field using -`hasFieldName()`, and to retrieve the value using -`getFieldName()`. - -This also applies to retrieving extended elements, but instead of there -being built-in methods for each extended field type, use -`hasExtension(fieldName)` and `getExtension(fieldName)`. - -The argument passed to `hasExtension()` and `getExtension()` is a -unique identifier for that field. The identifier is a static property of -the `NyctSubway` class. For example, the `nyct_trip_descriptor` -extended field has a unique identifier of -`NyctSubway.nyctTripDescriptor`. - -Since this field is an extension to the standard `TripDescriptor` -field, you can check for its presence as follows: - -```java -TripUpdate tripUpdate = entity.getTripUpdate(); - -TripDescriptor td = tripUpdate.getTrip(); - -if (td.hasExtension(NyctSubway.nyctTripDescriptor)) { - // ... -} -``` - -The `NyctSubway.nyctTripDescriptor` identifier corresponds to a field -of the created class `NyctTripDesciptor`, meaning that you can -retrieve its value and assign it directly to the class type. - -```java -NyctTripDescriptor nyctTd = td.getExtension(NyctSubway.nyctTripDescriptor); -``` -You can now access the values directly from this instance of -`NyctTripDescriptor`. For example, one of the added fields is -direction, which indicates whether the general direction of the train is -North, South, East or West. The following code shows how to use this -value: - -```java -if (nyctTd.hasDirection()) { - Direction direction = nyctTd.getDirection(); - - switch (direction.getNumber()) { - case Direction.NORTH_VALUE: // Northbound train - break; - - case Direction.SOUTH_VALUE: // Southbound train - break; - - case Direction.EAST_VALUE: // Eastbound train - break; - - case Direction.WEST_VALUE: // Westbound train - break; - - } -} -``` - -Similarly, you can access other extended fields, either from the -`NyctTripDescriptor` object, or from an instance of the -`NyctStopTimeUpdate` extension. - -### GTFS-realtime Extension Complete Example - -Piecing together the snippets from this case study, the following code -shows in context how you can access the extra fields such as the -train's direction and identifier: - -```java -public class NyctProcessor { - - // Loads and processes the feed - public void process(String apiKey) throws IOException { - - URL url = new URL("http://datamine.mta.info/mta_esi.php?feed_id=1&key=" + apiKey); - - InputStream is = url.openStream(); - - // Register the NYC-specific extensions - - ExtensionRegistry registry = ExtensionRegistry.newInstance(); - - NyctSubway.registerAllExtensions(registry); - - FeedMessage fm = FeedMessage.parseFrom(is, registry); - - // Loop over all entities - - for (FeedEntity entity : fm.getEntityList()) { - // In this example only trip updates are processed - - if (entity.hasTripUpdate()) { - processTripUpdate(entity.getTripUpdate()); - } - } - } - - // Used to process a single trip update - public void processTripUpdate(TripUpdate tripUpdate) { - - if (tripUpdate.hasTrip()) { - - TripDescriptor td = tripUpdate.getTrip(); - - // Check if the extended trip descriptor is available - - if (td.hasExtension(NyctSubway.nyctTripDescriptor)) { - NyctTripDescriptor nyctTd = td.getExtension(NyctSubway.nyctTripDescriptor); - - processNyctTripDescriptor(nyctTd); - } - } - } - - // Process a single extended trip descriptor - public void processNyctTripDescriptor(NyctTripDescriptor nyctTd) { - - // If the train ID is specified, output it - if (nyctTd.hasTrainId()) { - String trainId = nyctTd.getTrainId(); - - System.out.println("Train ID: " + trainId); - } - - // If the direction is specified, output it - - if (nyctTd.hasDirection()) { - Direction direction = nyctTd.getDirection(); - - String directionLabel = null; - - switch (direction.getNumber()) { - case Direction.NORTH_VALUE: - directionLabel = "North"; - break; - - case Direction.SOUTH_VALUE: - directionLabel = "South"; - break; - - case Direction.EAST_VALUE: - directionLabel = "East"; - break; - - case Direction.WEST_VALUE: - directionLabel = "West"; - break; - - default: - directionLabel = "Unknown Value"; - } - - System.out.println("Direction: " + directionLabel); - } - } -} -``` - -After you invoke the `process()` method, your output should be similar -to the following: - -``` -Direction: North - -Train ID: 06 0139+ PEL/BBR -``` diff --git a/.idea/gtfs-realtime-book/ch-11-publishing-feeds.md b/.idea/gtfs-realtime-book/ch-11-publishing-feeds.md deleted file mode 100644 index 2569f57..0000000 --- a/.idea/gtfs-realtime-book/ch-11-publishing-feeds.md +++ /dev/null @@ -1,348 +0,0 @@ -## 11. Publishing GTFS-realtime Feeds - -So far this book has been focused on how to consume GTFS-realtime feeds; -in this chapter you will be shown how to create and publish your own -GTFS-realtime feeds. - -While this chapter is primarily intended for transit agencies (or -third-party companies providing services to public transit companies), -this information can be useful in other situations also. - -Even if you do not represent a transit agency or have access to the GPS -units of an entire bus fleet, there may still be situations where you -want to produce a GTFS feed. For example, if you have a trip planning -server that can only handle GTFS and GTFS-realtime data, you might build -your own GTFS-realtime feeds in the following situations: - -* A transit company offers service alerts only via Twitter or an RSS - feed. -* You can access vehicle positions or estimated arrivals from a feed - in a format such as SIRI, NextBus or BusTime. -* You have interpolated your own vehicle positions based on - GTFS-realtime trip updates. -* You have interpolated your own trip updates based on vehicle - positions. - -### Building Protocol Buffer Elements - -When you generate source files using the `protoc` command, there is a -*builder* class created for each element type. To create an element to -include in a protocol buffer, you use its builder to construct the -element. - -For example, a service alert entity uses the `Alert` class. To -construct your own service alert, you would use the `Alert.Builder` -class. The `Alert` class contains a static method called -`newBuilder()` to create an instance of `Alert.Builder`. - -```java -Alert.Builder alert = Alert.newBuilder(); -``` - -You can now set the various elements that describe a service alert. - -```java -alert.setCause(Cause.ACCIDENT); - -alert.setEffect(Effect.DETOUR); -``` - -Most elements will be more complex than this; you will need to build -them in a similar manner before adding them to the alert. For example, -the header text for a service alert uses the `TranslatedString` -element type, which contains one or more translations of a single -string. - -```java -Translation.Builder translation = Translation.newBuilder(); - -translation.setText("Car accident"); - -TranslatedString.Builder translatedString = TranslatedString.newBuilder(); - -translatedString.addTranslation(translation); - -alert.setHeaderText(translatedString); -``` - -In actual fact, you can chain together these calls, since the builder -methods return the builder. The first two lines of the above code can be -shortened as follows: - -```java -Translation.Builder translation = Translation.newBuilder().setText("Car accident"); -``` - -For repeating elements (such as the `informed_entity` field), use the -`addElementName()` method. In the case of `informed_entity`, -this would be `addInformedEntity()`. The following code adds an -informed entity to the alert for a route with an ID of 102: - -```java -EntitySelector.Builder entity = EntitySelector.newBuilder().setRouteId("102"); - -alert.addInformedEntity(entity); -``` - -### Creating a Complete Protocol Buffer - -The previous section showed the basics of creating a service alert -message, but a protocol buffer feed has more to it than just a single -entity. It can have multiple entities, and you must also include the -GTFS-realtime header. The header can be created as follows: - -```java -FeedHeader.Builder header = FeedHeader.newBuilder(); - -header.setGtfsRealtimeVersion("1.0"); -``` - -A single service alert (or a trip update, or a vehicle position) is -contained within a `FeedEntity` object. Each `FeedEntity` in a feed -must have a unique ID. The following code creates the `FeedEntity` -using the `alert` object created in the previous section. - -```java -FeedEntity.Builder entity = FeedEntity.newBuilder(); - -entity.setId("SOME UNIQUE ID"); - -entity.setAlert(alert); -``` - -Once you have the header and an entity you can create the feed as -follows: - -```java -FeedMessage.Builder message = FeedMessage.newBuilder(); - -message.setHeader(header); - -message.addEntity(entity); -``` - -***Note**: A feed with no entities is also valid; in the middle of the night -there may be no vehicle positions or trip updates, and there may -frequently be no service alerts.* - -Once you have created this object, you can turn it into a -`FeedMessage` by calling `build()`. - -``` -FeedMessage feed = message.build(); -``` - -This will give you a `FeedMessage` object just like when you parse a -third-party feed using `FeedMessage.parseFrom()`. - -### Full Source Code - -Piecing together all of the code covered so far in this chapter, you -could create a service alert feed (using a fictional detour) using the -following code. - -This example makes use of a helper method to build translated strings, -since it needs to be done a number of times. If you want to create the -alert in multiple languages, you would need to change this method -accordingly. - -```java -public class SampleServicesAlertsFeedCreator { - // Helper method to simplify creation of translated strings - - private TranslatedString translatedString(String str) { - Translation.Builder translation = Translation.newBuilder().setText(str); - - return TranslatedString.newBuilder().addTranslation(translation).build(); - } - - public FeedMessage create() { - Alert.Builder alert = Alert.newBuilder(); - - alert.setCause(Cause.ACCIDENT); - alert.setEffect(Effect.DETOUR); - alert.setUrl(translatedString("http://www.example.com")); - alert.setHeaderText(translatedString("Car accident on 14th Street")); - - alert.setDescriptionText(translatedString( - "Please be aware that 14th Street is closed due to a car accident" - )); - - // Loop over several route IDs to mark them as impacted - - String impactedRouteIds[] = { "102", "103" }; - - for (int i = 0; i < impactedRouteIds.length; i++) { - EntitySelector.Builder entity = EntitySelector.newBuilder(); - - entity.setRouteId(impactedRouteIds[i]); - - alert.addInformedEntity(entity); - } - - // Create the alert container entity - - FeedEntity.Builder entity = FeedEntity.newBuilder(); - - entity.setId("1"); - entity.setAlert(alert); - - // Build the feed header - - FeedHeader.Builder header = FeedHeader.newBuilder(); - - header.setGtfsRealtimeVersion("1.0"); - - // Build the feed using the header and entity - - FeedMessage.Builder message = FeedMessage.newBuilder(); - - message.setHeader(header); - message.addEntity(entity); - - // Return the built FeedMessage - return message.build(); - } -} -``` - -### Modifying an Existing Protocol Buffer - -In some circumstances you might want to modify an existing protocol -buffer. For example, consider a case where you have access to a service -alerts feed, but also want to add additional service alerts that you -parsed from Twitter. The following diagram demonstrates this: - -![Modified Feed](images/modified-feed.png) - -In this case, you can turn a `FeedMessage` object into a -`FeedMessage.Builder` object by calling to the `toBuilder()`` method. -You can then add additional alerts as required and create a new feed. - -```java -// Parse some third-party feed - -URL url = new URL("http://example.com/alerts.pb"); - -InputStream is = url.openStream(); - -FeedMessage message = GtfsRealtime.FeedMessage.parseFrom(is); - -// Convert existing feed into a builder - -FeedMessage.Builder builder = message.toBuilder(); - -Alert.Builder alert = Alert.newBuilder(); - -// Add the details of the alert here - -// Create the alert entity - -FeedEntity.Builder entity = FeedEntity.newBuilder(); - -entity.setId("SOME ID"); -entity.setAlert(alert); - -// Add the new entity to the builder -builder.addEntity(entity); - -// Build the update the FeedMessage -message = builder.build(); -``` - -### Saving a Protocol Buffer File - -Once you have created a protocol buffer, the next step is to output it -so other systems that read GTFS-realtime feeds can consume it (such as -for others who publish real-time data in their apps or web sites). - -Typically, you would generate a new version of the feed every `X` -seconds, then save (or upload) it each time to your web server (see the -next section for discussion on frequency of updates). - -The raw protocol buffer bytes can be output using the `writeTo()` -method on the `FeedMessage` object. This method accepts an -`OutputStream` object as its only argument. - -For example, to output the service alerts feed created in this chapter -to a file, you can use the `FileOutputStream` class. - -Note: While there are no specific rules for naming a protocol buffer, -often the `.pb` extension is used. - -```java -SampleServicesAlertsFeedCreator creator = new SampleServicesAlertsFeedCreator(); - -FeedMessage message = creator.create(); - -File file = new File("/path/to/output/alerts.pb"); - -OutputStream outputStream = new FileOutputStream(file); - -message.writeTo(outputStream); -``` - -### Serving a Protocol Buffer File - -The recommended content type header value to use when serving a Protocol -Buffer file is **application/octet-stream**. In Apache HTTP Server, you -can set the following configuration parameter to serve `.pb` files -with this content type: - -``` -AddType application/octet-stream .pb -``` - -If you are using nginx for your web server, you can add the following -entry to the nginx `mime.types` file: - -``` -types { - ... - - application/octet-stream pb; -} -``` - -### Frequency of Updates - -When publishing your own feed, the frequency in which you update the -feed on your web server depends on how frequently the source data is -updated. - -It is important to take into account the capabilities of your servers -when providing a GTFS-realtime feed, as the more frequently the data is -updated, the more resources that are required. Each of the GTFS-realtime -message types has slightly different needs: - -* **Vehicle Positions.** These will need updating very frequently, as - presumably the vehicles on your transit network are always moving. A - vehicle position feed could update as frequently as every 10-15 - seconds. -* **Trip Updates.** These will need updating very frequently, although - perhaps not as frequently as vehicle positions. Estimates would - constantly be refined by new vehicle positions, but a single - movement (or lack of movement) for a vehicle is not likely to make a - huge difference to estimates. A trip updates feed could update every - 10-30 seconds. -* **Service Alerts.** These will typically change far less frequently - then vehicle positions or trip updates. A system that triggered an - update to the service alerts feed only when a new alert was entered - into the system would be far more efficient than automatically doing - it every `X` seconds. - -To summarize: - -- **Vehicle Positions.** Update every 10-15 seconds. -- **Trip Updates.** Update every 10-30 seconds. -- **Service Alerts.** Triggered on demand when new data is available. - -If your transit agency does not run all night, an additional efficiency -would be to not update the feed at all when the network has shut down -for the night. - -In this case, once the last trip has finished, an empty protocol buffer -would be uploaded (that is, a valid buffer but with no entities), and -the next version would not be uploaded until the next morning when the -first trip starts. - diff --git a/.idea/gtfs-realtime-book/ch-12-conclusion.md b/.idea/gtfs-realtime-book/ch-12-conclusion.md deleted file mode 100644 index a6092af..0000000 --- a/.idea/gtfs-realtime-book/ch-12-conclusion.md +++ /dev/null @@ -1,29 +0,0 @@ -## Conclusion - -Thanks for reading *The Definitive Guide to GTFS-realtime*. I wrote this -book with the intention of providing a comprehensive guide to getting -started with consuming real-time data using the GTFS-realtime -specification. - -One of the biggest advantages GTFS-realtime has over other real-time -specifications or services (such NextBus or SIRI) is that it is designed -to complement GTFS feeds by sharing a common set of identifiers. - -On one hand, GTFS-realtime is very straightforward, as it provides only -three different types of messages (service alerts, vehicle positions and -trip updates), but there are a number of complexities involved in -getting started with both consuming and producing GTFS-realtime feeds, -such as setting up Protocol Buffers to read feeds, or understanding the -intent of trip updates. - -The key takeaways from this book are: - -* The different types of real-time data available in GTFS-realtime feeds. -* How to read data from a GTFS-realtime feed. -* How to apply the data you read from GTFS-realtime feeds to your own applications. -* They main concepts behind producing and publishing your own GTFS-realtime feeds. - -If you have enjoyed this book, please share it or feel free to contribution improvements or changes as necessary. - -*Quentin Zervaas* -August, 2015 diff --git a/.idea/gtfs-realtime-book/images/GTFS-realtime-consumption.png b/.idea/gtfs-realtime-book/images/GTFS-realtime-consumption.png deleted file mode 100644 index 83fee1a..0000000 Binary files a/.idea/gtfs-realtime-book/images/GTFS-realtime-consumption.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/images/GTFS-realtime-direct-or-intermediate.png b/.idea/gtfs-realtime-book/images/GTFS-realtime-direct-or-intermediate.png deleted file mode 100644 index 8375211..0000000 Binary files a/.idea/gtfs-realtime-book/images/GTFS-realtime-direct-or-intermediate.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/images/GTFS-realtime-structure.png b/.idea/gtfs-realtime-book/images/GTFS-realtime-structure.png deleted file mode 100644 index c1308c8..0000000 Binary files a/.idea/gtfs-realtime-book/images/GTFS-realtime-structure.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/images/Modified-Feed.png b/.idea/gtfs-realtime-book/images/Modified-Feed.png deleted file mode 100644 index 11ce531..0000000 Binary files a/.idea/gtfs-realtime-book/images/Modified-Feed.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/images/bearing-from-position.png b/.idea/gtfs-realtime-book/images/bearing-from-position.png deleted file mode 100644 index 0712a9c..0000000 Binary files a/.idea/gtfs-realtime-book/images/bearing-from-position.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/images/boston-trip.png b/.idea/gtfs-realtime-book/images/boston-trip.png deleted file mode 100644 index 60ff67b..0000000 Binary files a/.idea/gtfs-realtime-book/images/boston-trip.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/images/mbta_bus.png b/.idea/gtfs-realtime-book/images/mbta_bus.png deleted file mode 100644 index a97a94d..0000000 Binary files a/.idea/gtfs-realtime-book/images/mbta_bus.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/images/remaining_stops.png b/.idea/gtfs-realtime-book/images/remaining_stops.png deleted file mode 100644 index 277f401..0000000 Binary files a/.idea/gtfs-realtime-book/images/remaining_stops.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/images/stoptime_map.png b/.idea/gtfs-realtime-book/images/stoptime_map.png deleted file mode 100644 index 54a0c33..0000000 Binary files a/.idea/gtfs-realtime-book/images/stoptime_map.png and /dev/null differ diff --git a/.idea/gtfs-realtime-book/images/vehicle-position.png b/.idea/gtfs-realtime-book/images/vehicle-position.png deleted file mode 100644 index 5920ba7..0000000 Binary files a/.idea/gtfs-realtime-book/images/vehicle-position.png and /dev/null differ diff --git a/gtfs-book/ch-00-definitive-guide-to-gtfs.md b/gtfs-book/ch-00-definitive-guide-to-gtfs.md new file mode 100644 index 0000000..2c32c67 --- /dev/null +++ b/gtfs-book/ch-00-definitive-guide-to-gtfs.md @@ -0,0 +1,55 @@ +# The Definitive Guide to GTFS + +*Consuming open public transportation data with the General Transit Feed +Specification* + +Originally written by Quentin Zervaas. + +## About This Book + +This book is a comprehensive guide to GTFS -- the General Transit Feed +Specification. It is comprised of two main sections. + +The first section describes what GTFS is and provides details about the +specification itself. In addition to this it also provides various +discussion points and things to consider for each of the files in the +specification. + +The second section covers a number of topics that relate to actually +using GTFS feeds, such as how to calculate fares, how to search for +trips, how to optimize feed data and more. + +This book is written for developers that are using transit data for web +sites, mobile applications and more. It aims to be as language-agnostic +as possible, but uses SQL to demonstrate concepts of extracting data +from a GTFS feed. + +### About The Author + +Quentin Zervaas is a software developer from Adelaide, Australia. + +Quentin was the founder of TransitFeeds (now ), a web +site that provides a comprehensive listing of public transportation data +available around the world. This site is referenced various times +throughout this book. + +### Credits + +First Edition. Published in February 2014. + +**Technical Reviewer** + +Rupert Hanson + +**Copy Editor** + +Miranda Little + +**Disclaimer** + +The information in this book is distributed on an "as is" basis, without +warranty. Although every precaution has been taken in the preparation of +this work, the author shall not be liable to any person or entity with +respect to any loss or damage caused or alleged to be caused directly or +indirectly by the information contained in this book. + diff --git a/gtfs-book/ch-01-introduction.md b/gtfs-book/ch-01-introduction.md new file mode 100644 index 0000000..3b3765c --- /dev/null +++ b/gtfs-book/ch-01-introduction.md @@ -0,0 +1,45 @@ +## 1. Introduction to GTFS + +GTFS (General Transit Feed Specification) is a data standard developed +by Google used to describe a public transportation system. Its primary +purpose was to enable public transit agencies to upload their schedules +to Google Transit so that users of Google Maps could easily figure out +which bus, train, ferry or otherwise to catch. + +> **A GTFS feed is a ZIP file that contains a series of CSV files that +list routes, stops and trips in a public transportation system.** + +This book examines GTFS in detail, including which data from a public +transportation system can be represented, how to extract data, and +explores some more advanced techniques for optimizing and querying data. + +The official GTFS specification has been referenced a number of times in +this book. It is strongly recommended you are familiar with it. You can +view at the following URL: + + + +### Structure of a GTFS Feed + +A GTFS feed is a series of CSV files, which means that it is trivial to +include additional files in a feed. Additionally, files required as part +of the specification can also include additional columns. For this +reason, feeds from different agencies generally include different levels +of detail. + +***Note:** The files in a GTFS feed are CSV files, but use a file +extension of `.txt`.* + +A GTFS feed can be described as follows: + +> **A GTFS feed has one or more routes. Each route (`routes.txt`) has one or +more trips (`trips.txt`). Each trip visits a series of stops (`stops.txt`) +at specified times (`stop_times.txt`). Trips and stop times only contain +time of day information; the calendar is used to determine on which days +a given trip runs (`calendar.txt` and `calendar_dates.txt`).** + +The following chapters cover the main files that are included in all +GTFS feeds. For each file, the main columns are covered, as well as +optional columns that can be included. This book also covers some of the +unofficial columns that some agencies choose to include. + diff --git a/gtfs-book/ch-02-agencies.md b/gtfs-book/ch-02-agencies.md new file mode 100644 index 0000000..cb837ae --- /dev/null +++ b/gtfs-book/ch-02-agencies.md @@ -0,0 +1,47 @@ +## 2. Agencies (agency.txt) + +*This file is ***required*** to be included in GTFS feeds.* + +The `agency.txt` file is used to represent the agencies that provide +data for this feed. While its presence is optional, if there are routes +from multiple agencies included, then records in `routes.txt` make +reference to agencies in this file. + +| Field | Required? | Description | +| :----------------------------------------------------- | :--------: | :-------- | +| `agency_id` | Optional | An ID that uniquely identifies a single transit agency in the feed. If a feed only contains routes for a single agency then this value is optional. | +| `agency_name` | Required | The full name of the transit agency. | +| `agency_url` | Required | The URL of the transit agency. Must be a complete URL only, beginning with `http://` or `https://`. | +| `agency_timezone` | Required | Time zone of agency. All times in `stop_times.txt` use this time zone, unless overridden by its corresponding stop. All agencies in a single feed must use the same time zone. Example: **America/New_York** (See for more examples) | +| `agency_lang` | Required | Contains a two-letter ISO-639-1 code (such as `en` or `EN` for English) for the language used in this feed. | +| `agency_phone` | Optional | A single voice telephone number for the agency that users can dial if required. | +| `agency_fare_url` | Optional | A URL that describes fare information for the agency. Must be a complete URL only, beginning with `http://` or `https://`. | + +### Sample Data + +The following extract is taken from the GTFS feed of TriMet (Portland, +USA), located at . + +| `agency_name` | `agency_url` | `agency_timezone` | `agency_lang` | `agency_phone` | +| :------------ | :------------------------------------------- | :-------------------- | :------------ | :--------------- | +| `TriMet` | `[https://trimets.org](https://trimet.org/)` | `America/Los_Angeles` | `en` | `(503) 238-7433` | + +In this example, the `agency_id` column is included, but as there is +only a single entry the value can be empty. This means the `agency_id` +column in `routes.txt` also is not required. + +### Discussion + +The data in this file is typically used to provide additional +information to users of your app or web site in case schedules derived +from the rest of this feed are not sufficient (or in the case of +`agency_fare_url`, an easy way to provide a reference point to users +if the fare information in the feed is not being used). + +If you refer to the following screenshot, taken from Google Maps, you +can see the information from `agency.txt` represented in the +lower-left corner as an example of how it can be used. + +![GTFS agency](images/agency-google-maps.png) + + diff --git a/gtfs-book/ch-03-stops.md b/gtfs-book/ch-03-stops.md new file mode 100644 index 0000000..cd8dedb --- /dev/null +++ b/gtfs-book/ch-03-stops.md @@ -0,0 +1,117 @@ +## 3. Stops & Stations (stops.txt) + +*This file is ***required*** to be included in GTFS feeds.* + +The individual locations where vehicles pick up or drop off passengers +are represented by `stops.txt`. Records in this file are referenced in +`stop_times.txt`. A record in this file can be either a stop or a +station. A station has one or more child stops, as indicated using the +`parent_station` value. Entries that are marked as stations may not +appear in `stop_times.txt`. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `stop_id` | Required | An ID to uniquely identify a stop or station. | +| `stop_code` | Optional | A number or short string used to identify a stop to passengers. This is typically displayed at the physical stop or on printed schedules. | +| `stop_name` | Required | The name of the stop as passengers know it by. | +| `stop_desc` | Optional | A description of the stop. If provided, this should provide additional information to the `stop_name` value. | +| `stop_lat` | Required | The latitude of the stop (a number in the range of `-90` to `90`). | +| `stop_lon` | Required | The longitude of the stop (a number in the range of `-180` to `180`). | +| `zone_id` | Optional | This is an identifier used to calculate fares. A single zone ID may appear in multiple stops, but is ignored if the stop is marked as a station. | +| `stop_url` | Optional | A URL that provides information about this stop. It should be specific to this stop and not simply link to the agency's web site. | +| `location_type` | Optional | Indicates if a record is a stop or station. `0` or blank means a stop, `1` means a station. | +| `parent_station` | Optional | If a record is marked as a stop and has a parent station, this contains the ID of the parent (the parent must have a `location_type` of `1`). | +| `stop_timezone` | Optional | If a stop is located in a different time zone to the one specified in `agency.txt`, then it can be overridden here. | +| `wheelchair_boarding` | Optional | A value of `1` indicates it is possible for passengers in wheelchairs to board or alight. A value of `2` means the stop is not wheelchair accessible, while `0` or an empty value means no information is available. If the stop has a parent station, then 0 or an empty value means to inherit from its parent. | + +### Sample Data + +The following extract is taken from the TriMet GTFS feed +(). + +| `stop_id` | `stop_code` | `stop_name` | `stop_lat` | `stop_lon` | `stop_url` | +| :-------- | :----------- | :------------------ | :---------- | :------------ | :---------------------------------------------------| +| `2` | `2` | `A Ave & Chandler` | `45.420595` | `-122.675676` |` ` | +| `3` | `3` | `A Ave & Second St` | `45.419386` | `-122.665341` |` ` | +| `4` | `4` | `A Ave & 10th St` | `45.420703` | `-122.675152` |` ` | +| `6` | `6` | `A Ave & 8th St` | `45.420217` | `-122.67307` |` ` | + +The following diagram shows how these points look if you plot them onto +a map. + +![Stops](images/stops-sample.png) + +In this extract, TriMet use the same value for stop IDs and stop codes. +This is useful, because it means the stop IDs are stable (that is, they +do not change between feed versions). This means that if you want to +save a particular stop (for instance, if a user wants to save a +"favorite stop") you can trust that saving the ID will get the job done. + +**Note:** This is not always the case though, which means you may have +to save additional information if you want to save a stop. For instance, +you may need to save the coordinates or the list of routes a stop serves +so you can find it again if the stop ID has changed in a future version +of the feed. + +### Stops & Stations + +Specifying an entry in this file as a *station* is typically used when +there are many stops located within a single physical entity, such as a +train station or bus depot. While many feeds do not offer this +information, some large train stations may have up to 20 or 30 +platforms. + +Knowing the platform for a specific trip is extremely useful, but if a +passenger wants to select a starting point for their trip, showing them +a list of platforms may be confusing. + +Passenger: + +**"I want to travel from *Central Station* to *Airport Station*."** + +Web site / App: + +**"Board at *Central Station platform 5*, disembark at *Airport Station platform 1*."** + +In this example, the passenger selects the parent station, but they are +presented with the specific stop so they know exactly where within the +station they need to embark or disembark. + +### Wheelchair Accessibility + +If you are showing wheelchair accessibility information, it is important +to differentiate between "no access" and "no information", as knowing a +stop is not accessible is as important as knowing it is. + +If a stop is marked as being wheelchair accessible, you must check that +trips that visit the stop are also accessible (using the +`wheelchair_accessible` field in `trips.txt`). If the value in +`trips.txt` is blank, `0` or `1` then it is safe to assume the +trip can be accessed. If the stop is accessible and the trip is not, +then passengers in wheelchairs cannot use the trip. + +### Stop Features + +One of the proposed changes to GTFS is the addition of a file called +`stop_features.txt`. This is used to define characteristics about +stops. The great thing about this file is that it allows you to indicate +to users when a stop has a ticket machine, bike storage, lighting, or an +electronic display with real-time information. + +TriMet is one of the few agencies including this file. The following is +a sample of this file. + +| `stop_id` | `feature_type` | +| :-------- | :------------- | +| `61` | `4110` | +| `61` | `2310` | +| `61` | `5200` | + +This data indicate that stop `61` (NE Alberta & 24th) has a *Printed +Schedule Display* (`4110`), a *Bike Rack* (`2310`) and a *Street +Light* (`5200`). + +For more information about this proposal and a list of values and their +meanings, refer to +. + diff --git a/gtfs-book/ch-04-routes.md b/gtfs-book/ch-04-routes.md new file mode 100644 index 0000000..2e9716e --- /dev/null +++ b/gtfs-book/ch-04-routes.md @@ -0,0 +1,82 @@ +## 4. Routes (routes.txt) + +*This file is ***required*** to be included in GTFS feeds.* + +A route is a group of trips that are displayed to riders as a single +service. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `route_id` | Required | An ID that uniquely identifies the route. | +| `agency_id` | Optional | The ID of the agency a route belongs to, as it appears in `agency.txt`. Only required if there are multiple agencies in the feed. | +| `route_short_name` | Required | A nickname or code to represent this service. If this is left empty then the `route_long_name` must be included. | +| `route_long_name` | Required | The route full name. If this is left empty then the `route_short_name` must be included. | +| `route_desc` | Optional | A description of the route, such as where and when the route operates. | +| `route_type` | Required | The type of transportation used on a route (such as bus, train or ferry). See below for more information. | +| `route_url` | Optional | A URL of a web page that describes this particular route. | +| `route_color` | Optional | If applicable, a route can have a color assigned to it. This is useful for systems that use colors to identify routes. This value is a six-character hexadecimal number (for example, `FF0000` is red). | +| `route_text_color` | Optional | For routes that specify the `route_color`, a corresponding text color should also be specified. | + +### Sample Data + +The following extract is taken from the TriMet GTFS feed +(). + +| `route_id` | `route_short_name` | `route_long_name` | `route_type` | +| :--------- | :----------------- | :--------------------------- | :----------- | +| `1` | `1` | `Vermont` | `3` | +| `4` | `4` | `Division / Fessenden` | `3` | +| `6` | `6` | `Martin Luther King Jr Blvd` | `3` | + +This sample shows three different bus routes for the greater Portland +area. The `route_type` value of `3` indicates they are buses. See +the next section for more information about route types in GTFS. + +There is no agency ID value in this feed, as TriMet is the only agency +represented in the feed. + +The other thing to note about this data is that TriMet use the same +value for both `route_id` and `route_short_name`. This is very +useful, because it means if you have a user that wants to save +information about a particular route you can trust the `route_id` +value. Unfortunately, this is not the case in all GTFS feeds. Sometimes, +the `route_id` value may change with every version of a feed (or at +least, semi-frequently). Additionally, some feeds may also have multiple +routes with the same `route_short_name`. This can present challenges +when trying to save user data. + +### Route Types + +To indicate a route's mode of transport, the `route_type` column is +used. + +| Value | Description | +| :---- | :---------------- | +| `0` | Tram / Light Rail | +| `1` | Subway / Metro | +| `2` | Rail | +| `3` | Bus | +| `4` | Ferry | +| `5` | Cable Car | +| `6` | Gondola | +| `7` | Funicular | + +Agencies may interpret the meaning of these route types differently. For +instance, some agencies specify their subway service as rail (value of +`2` instead of `1`), while some specify their trains as light rail +(`0` instead of `2`). + +These differences between agencies occur mainly because of the vague +descriptions for each of these route types. If you use Google Transit to +find directions, you may notice route types referenced that are +different to those listed above. This is because Google Transit also +supports additional route types. You can read more about these +additional route types at +. + +Very few GTFS feeds made available to third-party developers actually +make use of these values, but it is useful to know in case you come +across one that does. For instance, Sydney Buses include their school +buses with a route type of `712`, while other buses in the feed have +route type `700`. + diff --git a/gtfs-book/ch-05-trips.md b/gtfs-book/ch-05-trips.md new file mode 100644 index 0000000..ee2b814 --- /dev/null +++ b/gtfs-book/ch-05-trips.md @@ -0,0 +1,222 @@ +## 5. Trips (trips.txt) + +*This file is ***required*** to be included in GTFS feeds.* + +The `trips.txt` file contains trips for each route. The specific stop +times are specified in `stop_times.txt`, and the days each trip runs +on are specified in `calendar.txt` and `calendar_dates.txt`. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `route_id` | Required | The ID of the route a trip belongs to as it appears in `routes.txt`. | +| `service_id` | Required | The ID of the service as it appears in `calendar.txt` or `calendar_dates.txt`, which identifies the dates on which a trip runs. | +| `trip_id` | Required | A unique identifier for a trip in this file. This value is referenced in `stop_times.txt` when specifying individual stop times. | +| `trip_headsign` | Optional | The text that appears to passengers as the destination or description of the trip. Mid-trip changes to the headsign can be specified in `stop_times.txt`. | +| `trip_short_name` | Optional | A short name or code to identify the particular trip, different to the route's short name. This may identify a particular train number or a route variation. | +| `direction_id` | Optional | Indicates the direction of travel for a trip, such as to differentiate between an inbound and an outbound trip. | +| `block_id` | Optional | A block is a series of trips conducted by the same vehicle. This ID is used to group 2 or more trips together. | +| `shape_id` | Optional | This value references a value from `shapes.txt` to define a shape for a trip. | +| `wheelchair_accessible` | Optional | `0` or blank indicates unknown, while `1` indicates the vehicle can accommodate at least one wheelchair passenger. A value of `2` indicates no wheelchairs can be accommodated. | + +### Sample Data + +Consider the following extract, taken from the `trips.txt` file of the +TriMet GTFS feed (). + +| `route_id` | `service_id` | `trip_id` | `direction_id` | `block_id` | `shape_id` | +| ---------- | ------------ | ----------- | -------------- | ---------- | ---------- | +| 1 | W.378 | 4282257 | 0 | 103 | 185327 | +| 1 | W.378 | 4282256 | 0 | 101 | 185327 | +| 1 | W.378 | 4282255 | 0 | 102 | 185327 | +| 1 | W.378 | 4282254 | 0 | 103 | 185327 | + +This data describes four individual trips for the "Vermont" bus route +(this was determined by looking up the `route_id` value in +`routes.txt`). While the values for `trip_id`, `block_id` and +`shape_id` are all integers in this particular instance, this is not a +requirement. Just like `service_id`, there may be non-numeric +characters. + +As each of these trips is for the same route and runs in the same +direction (based on `direction_id`) they can all be represented by the +same shape. Note however that this is not always be the case as some +agencies may start or finish trips for a single route at different +locations depending on the time of the day. If this were the case, then +the trip's shape would differ slightly (and therefore have a different +shape to represent it in `shapes.txt`). + +Although this example does not include `trip_headsign`, many feeds do +include this value. This is useful for indicating to a passenger where +the trip is headed. When the trip headsign is not provided in the feed, +you can determine the destination by using the final stop in a trip. + +**Tip:** If you are determining the destination based on the final stop, +you can either present the stop name to the user, or you can +reverse-geocode the stop's coordinates to determine the its locality. + +### Blocks + +In the preceding example, each trip has a value for `block_id`. The +first and the last trips here both have a `block_id` value of `103`. +This indicates that the same physical vehicle completes both of these +trips. As each of these trips go in the same direction, it is likely +that they start at the same location. + +This means there is probably another trip in the feed for the same block +that exists between the trips listed here. It would likely travel from +the finishing point of the first trip (`4282257`) to the starting +point of the other trip (`4282254`). If you dig a little deeper in the +feed you will find the trip shown in the following table. + +| `route_id` | `service_id` | `trip_id` | `direction_id` | `block_id` | `shape_id` | +| :--------- | :----------- | :-------- | :------------- | :--------- | :--------- | +| 1 | W.378 | 4282270 | 1 | 103 | 185330 | + +This is a trip traveling in the opposite direction for the same block. +It has a different shape ID because it is traveling in the opposite +direction; a shape's points must advance in the same direction a trip's +stop times do. + +***Note:** You should perform some validation when grouping trips +together using block IDs. For instance, if trips share a block_id value +then they should also have the same service_id value. You should also +check that the times do not overlap; otherwise the same vehicle would be +unable to service both trips.* + +If you dig even further in this feed, there are actually seven different +trips all using block `103` for the **W.378** service period. This +roughly represents a full day's work for a single vehicle. + +For more discussion on blocks and how to utilize them effectively, refer +to *Working With Trip Blocks*. + +### Wheelchair Accessibility + +Similar to `stops.txt`, you can specify the wheelchair accessibility +of a specific trip using the `wheelchair_accessible` field. While many +feeds do not provide this information (often because vehicles in a fleet +can be changed at the last minute, so agencies do not want to guarantee +this information), your wheelchair-bound users will love you if you can +provide this information. + +As mentioned in the section on `stops.txt`, it is equally important to +tell a user that a specific vehicle cannot accommodate wheelchairs as to +when it can. Additionally, if the stops in a feed also have wheelchair +information, then both the stop and trip must be wheelchair accessible +for a passenger to be able to access a trip at the given stop. + +### Trip Direction + +One of the optional fields in `trips.txt` is `direction_id`, which +is used to indicate the general direction a vehicle is traveling. At +present the only possible values are `0` to represent "inbound" and +`1` to represent "outbound". There are no specific guidelines as to +what each value means, but the intention is that an inbound trip on a +particular route should be traveling in the opposite direction to an +outbound trip. + +Many GTFS feeds do not provide this information. In fact, there are a +handful of feeds that include two entries in `routes.txt` for each +route (one for each direction). + +One of the drawbacks of `direction_id` is that there are many routes +for which "inbound" or "outbound" do not actually mean anything. Many +cities have loop services that start and finish each trip at the same +location. Some cities have one or more loops that travel in both +directions (generally indicated by "clockwise loop" and +"counter-clockwise loop", or words to that effect). In these instances, +the `direction_id` can be used to determine which direction the route +is traveling. + +### Trip Short Name + +The `trip_short_name` field that appears in `trips.txt` is used to +provide a vehicle-specific code to a particular trip on a route. Based +on GTFS feeds that are currently in publication, it appears there are +two primary use-cases for this field: + +* Specifying a particular train number for all trains on a route +* Specifying a route "sub-code" for a route variant. + +### Specifying a Train Number + +For certain commuter rail systems, such as SEPTA in Philadelphia or MBTA +in Boston, each train has a specific number associated with it. This +number is particularly meaningful to passengers as trains on the same +route may have different stopping patterns or even different features +(for instance, only a certain train may have air-conditioning or +wheelchair access). + +Consider the following extract from SEPTA's rail feed +(). + +| `route_id` | `service_id` | `trip_id` | `trip_headsign` | `block_id` | `trip_short_name` | +| ---------- | ------------ | ------------ | ------------------------ | ---------- | ----------------- | +| AIR | S5 | AIR_1404_V25 | Center City Philadelphia | 1404 | 1404 | +| AIR | S1 | AIR_402_V5 | Center City Philadelphia | 402 | 402 | +| AIR | S1 | AIR_404_V5 | Center City Philadelphia | 404 | 404 | +| AIR | S5 | AIR_406_V25 | Center City Philadelphia | 406 | 406 | + +In this data, there are four different trains all heading to the same +destination. The `trip_short_name` is a value that can safely be +presented to users as it has meaning to them. In this case, you could +present the first trip to passengers as: + +**"Train 1404 on the Airport line heading to Center City +Philadelphia."** + +In this particular feed, SEPTA use the same value for +`trip_short_name` and for `block_id`, because the train number +belongs to a specific train. This means after it completes the trip to +Center City Philadelphia it continues on. In this particular feed, the +following trip also exists: + +**"Train 1404 on the Warminster line heading to Glenside."** + +You can therefore think of the `trip_short_name` value as a +"user-facing" version of `block_id`. + +### Specifying a Route Sub-Code + +The other use-case for `trip_short_name` is for specifying a route +sub-code. For instance, consider an agency that has a route with short +name `100` that travels from stop `S1` to stop `S2`. At night the +agency only has a limited number of routes running, so they extend this +route to also visit stop `S3` (so it travels from `S1` to `S2` +then to `S3`). As it is a minor variation of the main path, the agency +calls this trip `100A`. + +The agency could either create a completely separate entry in +`routes.txt` (so they would have `100` and `100A`), or they can +override the handful of trips in the evening by setting the +`trip_short_name` to `100A`. The following table shows how this +example might be represented. + +| `route_id` | `trip_id` | `service_id` | `trip_short_name` | +| ---------- | --------- | ------------ | ----------------- | +| 100 | T1 | C1 | | +| 100 | T2 | C1 | | +| 100 | T3 | C1 | | +| 100 | T4 | C1 | 100A | + +In this example the `trip_short_name` does not need to be set for the +first three trips as they use the `route_short_name` value from +`routes.txt`. + +### Specifying Bicycle Permissions + +A common field that appears in many GTFS fields is +`trip_bikes_allowed`, which is used to indicate whether or not +passengers are allowed to take bicycles on board. This is useful for +automated trip planning when bicycle options can be included in the +results. + +The way this field works is similar to the wheelchair information; `0` +or empty means no information provided; `1` means no bikes allowed; +while `2` means at least one bike can be accommodated. + +**Note:** Unfortunately, this value is backwards when you compare it to +wheelchair accessibility fields. For more discussion on this matter, +refer to the topic on the Google Group for GTFS Changes +(). + diff --git a/gtfs-book/ch-06-stop-times.md b/gtfs-book/ch-06-stop-times.md new file mode 100644 index 0000000..9eb0f93 --- /dev/null +++ b/gtfs-book/ch-06-stop-times.md @@ -0,0 +1,160 @@ +## 6. Stop Times (stop_times.txt) + +*This file is ***required*** to be included in GTFS feeds.* + +The `stop_times.txt` file specifies individual stop arrivals and +departures for each trip. This file is typically the largest in a GTFS +feed as it contains many records that correspond to each entry in +`trips.txt`. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `trip_id` | Required | References a trip from `trips.txt`. This ID is referenced for every stop in a trip. | +| `arrival_time` | Required | The arrival time in `HH:MM:SS` format. Can be left blank, except for at least the first and last stop time in a trip. This value is typically be the same as `departure_time`. | +| `departure_time` | Required | The departure time in `HH:MM:SS` format. Can be left blank, except for at least the first and last stop time in a trip. This value is typically be the same as `arrival_time`. | +| `stop_id` | Required | References a single stop from `stops.txt`. | +| `stop_sequence` | Required | A unique number for a given trip to indicate the stopping order. Typically these values appear in order and increment by 1 for each stop time, but this is not always the case. | +| `stop_headsign` | Optional | This is text that appears to passengers at this stop to identify the trip's destination. It should only be used to override the `trip_headsign` value from `trips.txt`. | +| `pickup_type` | Optional | Indicates if passengers can be picked up at this stop. Sometimes a stop is drop-off only. | +| `drop_off_type` | Optional | Indicates if passengers can be dropped off at this stop. Sometimes a stop is pick-up only. | +| `shape_dist_traveled` | Optional | If a trip has an associated shape, this value indicates how far along that shape the vehicle has traveled when at this stop. Values in this file and `shapes.txt` must use the same unit. | + +### Sample Data + +Consider the following extract, taken from stop_times.txt in the TriMet +GTFS feed (). This represents the +first ten stops of a trip for bus route `1` ("Vermont") in Portland, as +covered in Sample Data for `trips.txt`. + +| `trip_id` | `arrival_time` | `departure_time` | `stop_id` | `stop_sequence` | `shape_dist_traveled` | +| :-------- | :------------- | :--------------- | :-------- | :-------------- | :-------------------- | +| 4282247 | 06:47:00 | 06:47:00 | 13170 | 1 | 0.0 | +| 4282247 | 06:48:18 | 06:48:18 | 7631 | 2 | 867.5 | +| 4282247 | 06:50:13 | 06:50:13 | 7625 | 3 | 2154.9 | +| 4282247 | 06:52:07 | 06:52:07 | 7612 | 4 | 3425.5 | +| 4282247 | 06:53:42 | 06:53:42 | 7616 | 5 | 4491.1 | +| 4282247 | 06:55:16 | 06:55:16 | 10491 | 6 | 5536.2 | +| 4282247 | 06:57:06 | 06:57:06 | 7588 | 7 | 6767.1 | +| 4282247 | 06:58:00 | 06:58:00 | 7591 | 8 | 7364.4 | +| 4282247 | 06:58:32 | 06:58:32 | 175 | 9 | 8618.7 | +| 4282247 | 06:58:50 | 06:58:50 | 198 | 10 | 9283.8 | + +If you were to plot the full trip on a map (including using its shape +file, as specified in `shapes.txt` and referenced in `trips.txt`), +it would look like the following diagram. The first stop is selected. + +![Stop Times](images/stop-times.png) + +### Arrival Time vs. Departure Time + +The first thing to notice about this data is that the values in +`arrival_time` and `departure_time` are the same. In reality, most +of the time these values are the same. The situation where these values +differ is typically when a vehicle is required to wait for a period of +time before departing. For instance: + +**"The train departs the domestic airport at 6:30am, arrives at the +international terminal at 6:35am. It waits for 10 minutes for passengers +who have just landed to board, then departs for the city at 6:45am."** + +While this is not typically something you need to worry about, be aware +that some feeds differ and the holdover time could be large. A person +rushing to a train wants to know the time it departs, while a husband +waiting at a stop to meet his wife wants to know what time it arrives. + +**Note:** In a situation where the difference between the arrival time +and departure is small, you may be better off always displaying the +earlier time to the user. The driver may view a one or two minute +holdover as an opportunity to keep on time, whereas a ten or fifteen +minute holdover is unlikely to be ignored as doing so would +significantly alter the schedule. + +### Scheduling Past Midnight + +One of the most important concepts to understand about +`stop_times.txt` is that times later than midnight can be specified. +For example, if a trip starts at 11:45 PM and takes an hour to complete, +its finishing time is 12:45 AM the next day. + +The `departure_time` value for the first stop is be `23:45:00`, +while the `arrival_time` value for the final stop is `24:45:00`. +If you were to specify the final arrival time as `00:45:00`, it +would be referencing 12:45 AM prior to the trip's starting time. + +While you could use the `stop_sequence` to determine which day the +trip fell on, it would be impossible to do a quick search purely based +on the given final stop. + +A trip may start and finish after midnight without being considered as +part of the next day's service. Many transit systems shut down +overnight, but may have a few services that run after midnight until +about 1 AM or 2 AM. It is logical to group them all together with the +same service as earlier trips, which this functionality allows you to +do. + +However, this has implications when searching for trips. For instance, +if you want to find all trips between 12:00 AM and 1:00 AM on 30 January +2014, then you need to search: + +* Between `00:00:00` and `01:00:00` for trips with service on `20140130` +* Between `24:00:00` and `25:00:00` for trips with service on` 20140129` + +In *Searching for Trips* you can see how to apply this to your +trip searches. + +### Time Points + +Most GTFS feeds provide arrival/departure times for every single stop. +In reality, most agencies do not have known times (or at least, they do +not publish times) for many of their stops. + +Typically, for routes that describe trains, subways or ferries that make +relatively few stops in a trip, all stops have a specified time. +However, often for bus routes that may make many stops in a single trip, +generally only the main stops have times shown on printed schedules. + +Typically, the bus drivers are required to meet these time points; if +they are ahead of schedule they might wait at a time point until the +scheduled time; if they are running late they might try to catch up in +order to adhere to the specified time point. + +This means that the intermediate points are likely estimates that have +been interpolated based on the amount of time between time points. If +there are multiple stops in-between the time points then the distance +between stops may also be used to calculate the estimate. + +In actual fact, GTFS feeds do not have to specify times for all stops. +The data in the following table is perfectly valid for a trip. + +| `trip_id` | `arrival_time` | `stop_id` | `stop_sequence` | `shape_dist_traveled` | +| :-------- | :------------- | :-------- | :-------------- | :-------------------- | +| T1 | 10:00:00 | S1 | 1 | 0 | +| T1 | | S2 | 2 | 1500 | +| T1 | | S3 | 3 | 3000 | +| T1 | 10:12:00 | S4 | 4 | 6000 | + +Based on this data, without taking into account the distance traveled, +you may estimate that the second stop arrives at 10:04 AM while the +third stop arrives at 10:08. + +If you consider the distance traveled, you might conclude the second +stop arrives at 10:03 AM while the third stop arrives at 10:06 AM. + +Some agencies include an additional column in `stop_times.txt` called +`timepoint`. This is used when they specify the times for all stops +but also want to indicate if only certain stops are guaranteed times. + +The following table shows how this would look using the previous data as +its basis. + +| `trip_id` | `arrival_time` | `stop_id` | `stop_sequence` | `shape_dist_traveled` | `timepoint` | +| :--------- | :------------- | :-------- | :-------------- | :-------------------- | :---------- | +| T1 | 10:00:00 | S1 | 1 | 0 | 1 | +| T1 | 10:03:00 | S2 | 2 | 1500 | 0 | +| T1 | 10:06:00 | S3 | 3 | 3000 | 0 | +| T1 | 10:12:00 | S4 | 4 | 6000 | 1 | + +This can be especially useful if you want to highlight these time points +so as to represent the printed schedules accurately, or even if you are +a transit agency just using the data for internal reporting. + diff --git a/gtfs-book/ch-07-calendar.md b/gtfs-book/ch-07-calendar.md new file mode 100644 index 0000000..aebf131 --- /dev/null +++ b/gtfs-book/ch-07-calendar.md @@ -0,0 +1,138 @@ +## 7. Trip Schedules (calendar.txt & calendar_dates.txt) + +*Each of these files are ***optional*** in a GTFS feed, but at least one +of them is ***required***.* + +The `calendar.txt` file is used to indicate the range of dates on +which trips are running. It works by including a start date and a finish +date (typically a range of 3-6 months), then a marker for each day of +the week on which it operates. If there are single-day scheduling +changes that occur during this period, then the `calendar_dates.txt` +file can be used to override the schedule for each of these days. + +The following table shows the specification for `calendar.txt`. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `service_id` | Required | A unique ID for a single service. This value is referenced by trips in `trips.txt`. | +| `start_date` | Required | This indicates the start date for a given service, in `YYYYMMDD` format. | +| `end_date` | Required | This indicates the end date for a given service, in `YYYYMMDD` format. | +| `monday` | Required | Contains `1` if trips run on Mondays between the start and end dates, `0` or empty if not. | +| `tuesday` | Required | Contains `1` if trips run on Tuesdays between the start and end dates, `0` or empty if not. | +| `wednesday` | Required | Contains `1` if trips run on Wednesdays between the start and end dates, `0` or empty if not. | +| `thursday` | Required | Contains `1` if trips run on Thursdays between the start and end dates, `0` or empty if not. | +| `friday` | Required | Contains `1` if trips run on Fridays between the start and end dates, `0` or empty if not. | +| `saturday` | Required | Contains `1` if trips run on Saturdays between the start and end dates, `0` or empty if not. | +| `sunday` | Required | Contains `1` if trips run on Sundays between the start and end dates, `0` or empty if not. | + +As mentioned above, the `calendar_dates.txt` file is used to define +exceptions to entries in `calendar.txt`. For instance, if a 3-month +service is specified in `calendar.txt` and a holiday lies on a Monday +during this period, then you can use calendar_dates.txt to override this +single date. + +If the weekend schedule were used for a holiday, then you would add a +record to remove the regular schedule for the holiday date, and another +record to add the weekend schedule for the holiday date. + +Some feeds choose only to include `calendar_dates.txt` and not +`calendar.txt`, in which case there is an "add service" record for +every service and every date in this file. + +The following table shows the specification for `calendar_dates.txt`. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `service_id` | Required | The service ID that an exception is being defined for. This is referenced in both `calendar.txt` and in `trips.txt`. Unlike `calendar.txt`, it is possible for a `service_id` value to appear multiple times in this file. | +| `date` | Required | The date for which the exception is occurring, in `YYYYMMDD` format. | +| `exception_type` | Required | This indicates whether the exception is denoting an added service (`1`) or a removed service (`2`). | + +### Sample Data + +The following is an extract from the `calendar.txt` file in Adelaide +Metro's GTFS feed (). This +extract includes schedules from the start of 2014 until the end of March +2014. + +| `service_id` | `monday` | `tuesday` | `wednesday` | `thursday` | `friday` | `saturday` | `sunday` | `start_date` | `end_date` | +| :----------- | :------- | :-------- | :---------- | :--------- | :------- | :--------- | :------- | :----------- | :--------- | +| 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 20140102 | 20140331 | +| 11 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 20140102 | 20140331 | +| 12 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 20140102 | 20140331 | + +Any trip in the corresponding `trips.txt` file with a `service_id` +value of `1` runs from Monday to Friday. Trips with a `service_id` +of `11` run only on Saturday, while those with `12` run only on +Sunday. + +Now consider an extract from `calendar_dates.txt` from the same feed, +as shown in the following table. + +| `service_id` | `date` | `exception_type` | +| ------------ | -------- | ---------------- | +| 1 | 20140127 | 2 | +| 1 | 20140310 | 2 | +| 12 | 20140127 | 1 | +| 12 | 20140310 | 1 | + +The first two rows mean that on January 27 and March 10 trips with +`service_id` of `1` are not running. The final two rows mean that on +those same dates trips with `service_id` of `12` are running. This +has the following meaning: + +**"On 27 January and 10 March, use the Sunday timetable instead of the +Monday-Friday timetable."** + +In Adelaide, these two dates are holidays (Australia Day and Labour +Day). It is Adelaide Metro's policy to run their Sunday timetable on +public holidays, which is reflected by the above records in their +`calendar_dates.txt` file. + +### Structuring Services + +The case described above is the ideal case for specifying services in a +GTFS feed (dates primarily specified in `calendar.txt` with a handful +of exceptions in `calendar_dates.txt`). + +Be aware that there are two other major ways that services are specified +in feeds. + +1. Using only `calendar_dates.txt` and expressly including every + single date within the service range. Each of these is included as + "service added" (an `exception_type` value of `1`). The + following table shows how this might look. + +| `service_id` | `date` | `exception_type` | +| :----------- | :------- | :--------------- | +| 1 | 20140102 | 1 | +| 1 | 20140103 | 1 | +| 11 | 20140104 | 1 | +| 12 | 20140105 | 1 | + +2. Not using `calendar_dates.txt`, but creating many records in + `calendar.txt` instead to span various dates. The following table + shows how you can represent Monday-Friday from the sample data in + this fashion. + +| `service_id` | `monday` | `tuesday` | `wednesday` | `thursday` | `friday` | `saturday` | `sunday` | `start_date` | `end_date` | +| :----------- | :------- | :-------- | :---------- | :--------- | :------- | :--------- | :------- | :----------- | :--------- | +| 1a | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 20140102 | 20140126 | +| holiday1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 20140127 | 20140127 | +| 1b | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 20140128 | 20140309 | +| holiday2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 20140310 | 20140310 | +| 1c | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 20140311 | 20140331 | + +In this example, each holiday has its own row in `calendar.txt` that +runs for a single day only. + +Refer to *Finding Service IDs* to see how to determine +services that are running for a given day. + +### Service Name + +There are a number of feeds that specify a column in `calendar.txt` +called `service_name`. This is used to give a descriptive name to each +service. For example, the Sedona Roadrunner in Arizona +() has services called +"Weekday Service", "Weekend Service" and "New Year's Eve Service". + diff --git a/gtfs-book/ch-08-fares.md b/gtfs-book/ch-08-fares.md new file mode 100644 index 0000000..88bcbea --- /dev/null +++ b/gtfs-book/ch-08-fares.md @@ -0,0 +1,115 @@ +## 8. Fare Definitions (fare_attributes.txt & fare_rules.txt) + +*These files are ***optional*** in a GTFS feed, but any rules specified +must reference a fare attributes record.* + +These two files define the types of fares that exist in a system, +including their price and transfer information. The attributes of a +particular fare exist in `fare_attributes.txt`, which has the +following columns. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `fare_id` | Required | A value that uniquely identifies a fare listed in this file. It must only appear once in this file. | +| `price` | Required | This field specifies the cost of the fare. For instance, if a trip costs $2 USD, then this value should be either `2.00` or `2`, and the `currency_type` should be `USD`. | +| `currency_type` | Required | This is the currency code that `price` is specified in, such as USD for United States Dollar. | +| `payment_method` | Required | This indicates when the fare is to be paid. `0` means it can be paid on board, while `1` means it must be paid before boarding. | +| `transfers` | Required | The number of transfers that may occur. This must either be empty (unlimited) transfers, or the number of transfers. | +| `transfer_duration` | Optional | This is the number of seconds a transfer is valid for. | + +The following table shows the specification for `fare_rules.txt`, +which defines the rules used to apply a fare to a particular trip. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `fare_id` | Required | This is the ID of the fare that a rule applies to as it appears in `fare_attributes.txt`. | +| `route_id` | Optional | The ID of a route as it appears in `routes.txt` for which this rule applies. If there are several routes with the same fare attributes, there may be a row in `fare_rules.txt` for each route. | +| `origin_id` | Optional | This value corresponds to a `zone_id` value from `stops.txt`. If specified, this means a trip must begin in the given zone in order to qualify for this fare. | +| `destination_id` | Optional | This value corresponds to a `zone_id` value from `stops.txt`. If specified, this means a trip must begin in the given zone in order to qualify for this fare. | +| `contains_id` | Optional | This value corresponds to a `zone_id` value from `stops.txt`. If specified, this means a trip must pass through every `contains_id` zone for the given fare (in other words, several rules may need to be checked). | + +Note that aside from `fare_id`, all fields are optional in this file. +This means some very complex rules can be made (especially when +transfers come into the calculation). The following URL has discussion +about different rules and some complex fare examples: + + + +Refer to *Calculating Fares* for discussion about the +algorithm for calculating fares for trips both with and without +transfers. + +### Sample Data + +The following data is taken from the TriMet GTFS feed +(). Firstly, the data from the +`fare_attributes.txt` file. + +| `fare_id` | `price` | `currency` | `payment_method` | `transfers` | `transfer_duration` | +| :-------- | :------ | :--------- | :--------------- | :---------- | :-------------------| +| B | 2.5 | USD | 0 | | 7200 | +| R | 2.5 | USD | 1 | | 7200 | +| BR | 2.5 | USD | 0 | | 7200 | +| RB | 2.5 | USD | 1 | | 7200 | +| SC | 1 | USD | 1 | 0 | | +| AT | 4 | USD | 1 | 0 | | +| VT | 0 | USD | 1 | 0 | | + +The data from the `fare_rules.txt` file is shown in the following +table. + +| `fare_id` | `route_id` | `origin_id` | `destination_id` | `contains_id` | +| :-------- | :--------- | :---------- | :--------------- | :------------ | +| B | | B | | B | +| R | | R | | R | +| BR | | B | | B | +| BR | | B | | R | +| RB | | R | | B | +| RB | | R | | R | +| SC | 193 | | | | +| SC | 194 | | | | +| AT | 208 | | | | +| VT | 250 | | | | + +In this sample data, TriMet have named some of their fares the same as +the zones specified in `stops.txt`. In this particular feed, bus stops +have a `zone_id` of `B`, while rail stops have `R`. + +The fares in this file are as follows: + +* Fare `B`. If you start at a bus stop (`zone_id` value of `B`), + you can buy your ticket on board (`payment_method` of `0`). You + may transfer an unlimited number (empty transfers value) for 2 hours + (`transfer_duration` of `7200`). The cost is $2.50 USD. +* Fare `R`. If you start at a rail stop, you must pre-purchase your + ticket. You may transfer an unlimited number of times to other rail + services for up to 2 hours. The cost is $2.50 USD. + +The `BR` fare describes a trip that begins on a bus then transfers to +a rail service (while `RB` is the opposite). This fare is not be +matched if the passenger does not travel on both, as all `contains_id` +values must be matched in order to apply a fare. + +The other fares (`SC`, `AT` and `VT`) all apply to their +respective `route_id` values, regardless of start and finish stops. +Tickets must be pre-purchased, and transfers are not allowed. The `VT` +fare (which corresponds to TriMet's Vintage Trolley) is free to ride +since it has a price of `0`. + +### Assigning Fares to Agencies + +One of the extensions available to `fare_attributes.txt` is to include +an `agency_id` column. This is to limit a specific fare to only routes +from the specified agency, in the case where a feed has multiple +agencies. + +This is useful because there may be two agencies in a feed that define +fares with no specific rules (in other words, the fare applies to all +trips). If the price differs, then GTFS dictates that the cheapest fare +is always applied. Using `agency_id` means these fares can be +differentiated accordingly. + +For more information about this extension, refer to the Google Transit +GTFS Extensions page at +. + diff --git a/gtfs-book/ch-09-shapes.md b/gtfs-book/ch-09-shapes.md new file mode 100644 index 0000000..bc05636 --- /dev/null +++ b/gtfs-book/ch-09-shapes.md @@ -0,0 +1,76 @@ +## 9. Trip Shapes (shapes.txt) + +*This file is ***optional*** in a GTFS feed.* + +Each trip in `trips.txt` can have a shape associated with it. The +shapes.txt file defines the points that make up an individual shape in +order to plot a trip on a map. Two or more records in `shapes.txt` +with the same `shape_id` value define a shape. + +The amount of data stored in this file can be quite large. In +*Optimizing Shapes* there are some strategies to efficiently +reduce the amount of shape data. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `shape_id` | Required | An ID to uniquely identify a shape. Every point for a shape contains the same value. | +| `shape_pt_lat` | Required | The latitude for a given point in the range of `-90` to `90`. | +| `shape_pt_lon` | Required | The longitude for a given point in the range of `-180` to `180`. | +| `shape_pt_sequence` | Required | A non-negative number that defines the ordering for points in a shape. A value must not be repeated within a single shape. | +| `shape_dist_traveled` | Optional | This value represents how far along a shape a particular point exists. This is a distance in a unit such as feet or kilometers. This unit must be the same as that used in `stop_times.txt`. | + +### Sample Data + +The following table shows a portion of a shape from the TriMet GTFS +feed. It is a portion of the shape that corresponds to the sample data +in the `stop_times.txt` section. + +| `shape_id` | `shape_pt_lat` | `shape_pt_lon` | `shape_pt_sequence` | `shape_dist_traveled` | +| :--------- | :------------- | :------------- | :------------------ | :-------------------- | +| 185328 | 45.52291 | -122.677372 | 1 | 0.0 | +| 185328 | 45.522921 | -122.67737 | 2 | 3.7 | +| 185328 | 45.522991 | -122.677432 | 3 | 34.0 | +| 185328 | 45.522992 | -122.677246 | 4 | 81.5 | +| 185328 | 45.523002 | -122.676567 | 5 | 255.7 | +| 185328 | 45.523004 | -122.676486 | 6 | 276.4 | +| 185328 | 45.523007 | -122.676386 | 7 | 302.0 | +| 185328 | 45.523024 | -122.675386 | 8 | 558.4 | +| 185328 | 45.522962 | -122.67538 | 9 | 581.0 | + +In this sample data, the `shape_dist_traveled` is listed in feet. +There is no way to specify in a GTFS feed which units are used for this +column -- it could be feet, miles, meters, kilometers. In actual fact, +it does not really matter, just as long as the units are the same as in +`stop_times.txt`. + +If you need to present a distance to your users (such as how far you +need to travel on a bus), you can calculate it instead by adding up the +distance between each point and formatting it based on the user's +locale settings. + +### Point Sequences + +In most GTFS feeds the `shape_pt_sequence` value starts at 1 and +increments by 1 for every subsequent point. Additionally, points are +typically listed in order of their sequence. + +You should not rely on these two statements though, as this is not a +requirement of GTFS. Many transit agencies have automated systems that +export their GTFS from a separate system, which can sometimes result in +an unpredictable output format. + +For instance, a trip that has stop times listed with the sequences `1`, `2`, +`9`, `18`, `7`, `3` is perfectly valid. + +### Distance Travelled + +The `shape_dist_traveled` column is used so you can programmatically +determine how much of a shape to draw when showing a map to users of +your web site or app. If you use techniques in *Optimizing Shapes* +to reduce the file size of shape data, then it becomes difficult to +use this value. + +Alternatively, you can calculate portions of shapes by determining which +point in a shape travels closest to the start and finish points of a +trip. + diff --git a/gtfs-book/ch-10-frequencies.md b/gtfs-book/ch-10-frequencies.md new file mode 100644 index 0000000..45021ab --- /dev/null +++ b/gtfs-book/ch-10-frequencies.md @@ -0,0 +1,98 @@ +## 10. Repeating Trips (frequencies.txt) + +*This file is ***optional*** in a GTFS feed.* + +In some cases a route may repeat a particular stopping pattern every few +minutes (picture a subway line that runs every 5 minutes). Rather than +including entries in `trips.txt` and `stop_times.txt` for every +single occurrence, you can include the trip once then define rules for +it to repeat for a period of time. + +Having a trip repeat only works in the case where the timing between +stops remains consistent for all stops. Using `frequencies.txt`, you +use the relative times between stops alongside a calculated starting +time for the trip in order to determine the specific stop times. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `trip_id` | Required | The ID of the trip as it appears in `trips.txt` that is being repeated. A single trip can appear multiple times for different time ranges. | +| `start_time` | Required | The time at which a given trip starts repeating, in `HH:MM:SS` format. | +| `end_time` | Required | The time at which a given trip stops repeating, in `HH:MM:SS` format. | +| `headway_secs` | Required | The time in seconds between departures from a given stop during the time range. | +| `exact_times` | Optional | Whether or not repeating trips should be exactly scheduled. See below for discussion. | + +### Sample Data + +The following sample data is taken from Société de transport de Montréal +(STM) in Montreal +(). + +| `trip_id` | `start_time` | `end_time` | `headway_secs` | +| :----------------------- | :----------- | :--------- | :------------- | +| 13S_13S_F1_1_2_0.26528 | 05:30:00 | 07:25:30 | 630 | +| 13S_13S_F1_1_6_0.34167 | 07:25:30 | 08:40:10 | 560 | +| 13S_13S_F1_1_10_0.42500 | 08:40:10 | 12:19:00 | 505 | +| 13S_13S_F1_1_7_0.58750 | 12:19:00 | 15:00:00 | 460 | +| 13S_13S_F1_1_11_0.66875 | 15:00:00 | 18:23:00 | 420 | +| 13S_13S_F1_1_5_0.78889 | 18:23:00 | 21:36:35 | 505 | + +Each of the trips listed here have corresponding entries in +`trips.txt` and `stop_times.txt` (more on that shortly). This data +can be interpreted as follows. + +* The first trip runs every 10m 30s from 5:30am until 7:25am. +* The second trip runs every 9m 20s from 7:25am until 8:40am, and so on. + +The following table shows some of the stop times for the first trip +(`departure_time` is omitted here for brevity, since it is identical +to `arrival_time`). + +| `trip_id` | `stop_id` | `arrival_time` | `stop_sequence` | +| :---------------------- | :-------- | :------------- | :-------------- | +| 13S_13S_F1_1_2_0.26528 | 18 | 06:22:00 | 1 | +| 13S_13S_F1_1_2_0.26528 | 19 | 06:22:59 | 2 | +| 13S_13S_F1_1_2_0.26528 | 20 | 06:24:00 | 3 | +| 13S_13S_F1_1_2_0.26528 | 21 | 06:26:00 | 4 | + +As this trip runs to the specified frequency, the specific times do not +matter. Instead, the differences are used. For the above stop times, +there is a 59 second gap between the first and second time, a 61 second +gap between the second and third, and a 120 second gap between the third +and fourth. + +The stop times for the first frequency record (10.5 minutes apart) can +be calculated as follows. + +* `05:30:00`, `05:30:59`, `05:32:00`, `05:34:00` +* `05:40:30`, `05:41:29`, `05:42:30`, `05:44:30` +* `05:51:00`, `05:51:59`, `05:53:00`, `05:55:00` +* ... +* `07:25:30`, `07:26:29`, `07:27:30`, `07:29:30` + +### Specifying Exact Times + +In the file definition at the beginning of this chapter there is an +optional field called `exact_times`. It may not be immediately clear +what this field means, so to explain it better, consider the frequency +definitions in the following table. + +| `trip_id` | `start_time` | `end_time` | `headway_secs` | `exact_times` | +| :-------- | :----------- | :--------- | :------------- | :------------ | +| T1 | 09:00:00 | 10:00:00 | 300 | 0 | +| T2 | 09:00:00 | 10:00:00 | 300 | 1 | + +These two frequencies are the same, with only the `exact_times` value +different. The first (`T1`) should be presented in a manner such as: + +**"Between 9 AM and 10 AM this trip departs every 5 minutes."** + +The second trip (`T2`) should be presented as follows: + +**"This trip departs at 9 AM, 9:05 AM, 9:10 AM, ..."** + +While ultimately the meaning is the same, this difference is used in +order to allow agencies to represent their schedules more accurately. +Often, schedules that convey to passengers that they will not have to +wait more than five minutes do so without having to explicitly list +every departure time. + diff --git a/gtfs-book/ch-11-stop-transfers.md b/gtfs-book/ch-11-stop-transfers.md new file mode 100644 index 0000000..6a43674 --- /dev/null +++ b/gtfs-book/ch-11-stop-transfers.md @@ -0,0 +1,65 @@ +## 11. Stop Transfers (transfers.txt) + +*This file is ***optional*** in a GTFS feed.* + +To define how passengers can transfer between routes at specific stops +feed providers can include `transfers.txt`. This does not mean +passengers cannot transfer elsewhere, but it does indicate if a transfer +is not possible between certain stops, or a minimum time required if +transfer is possible. + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `from_stop_id` | Required | The ID of stop as it appears in `stops.txt` where the connection begins. If this references a station, then this rule applies to all stops within the station. | +| `to_stop_id` | Required | The ID of the stop as it appears in `stops.txt` where the connection between trips ends. If this references a station, then this rule applies to all stops within the station. | +| `transfer_type` | Required | `0` or blank means the recommended transfer point, `1` means the secondary vehicle will wait for the first, `2` means a minimum amount of time is required, `3` means transfer is not possible. | +| `min_transfer_time` | Optional | If the `transfer_type` value is `2` then this value must be specified. It indicates the number of seconds required to transfer between the given stops. | + +It is also possible that records in this file are specified for +ticketing reasons. For instance, some train stations are set up so that +passengers can transfer between routes without needing to validate their +ticket again or buy a transfer. Other stations that are shared between +those same routes might not have this open transfer area, thereby +requiring you to exit one route fully before buying another ticket to +access the second. + +### Sample Data + +The following table shows some sample transfer rules from TriMet in +Portland's GTFS feed (). + +| `from_stop_id` | `to_stop_id` | `transfer_type` | `min_transfer_time` | +| :------------- | :----------- | :-------------- | :------------------ | +| 7807 | 5020 | 0 | | +| 7807 | 7634 | 0 | | +| 7807 | 7640 | 0 | | + +These rules indicate that if you are transferring from a route that +visits stop `7807` to any route that visits the other stops (`5020`, +`7634` or `7640`), then this is the ideal place to do it. + +In other words, if there are other locations along the first route where +you could transfer to the second route, then those stops should not be +used. These rules say this is the best place to transfer. + +Consider the transfer rule in the following table, taken from the New +York City Subway GTFS feed (). + +| `from_stop_id` | `to_stop_id` | `transfer_type` | `min_transfer_time` | +| :------------- | :----------- | :-------------- | :------------------ | +| 121 | 121 | 2 | 180 | + +In this data, the MTA specifies how long it takes to transfer to +different platforms within the same station. The stop with ID 121 refers +to the 86th St station (as specified in `stops.txt`). It has a +`location_type` of `1` and two stops within it (`121N` and +`121S`). The above transfer rule says that if you need to transfer +from `121N` to `121S` (or vice-versa) then a minimum time of 3 +minutes (180 seconds) must be allocated. + +If you were to calculate the time taken to transfer using the +coordinates of each of these platforms, it would only take a few seconds +as they are physically close to each other. In reality though, you must +exit one platform then walk around and enter the other platform (often +having to use stairs). + diff --git a/gtfs-book/ch-12-feed-information.md b/gtfs-book/ch-12-feed-information.md new file mode 100644 index 0000000..ffad2ab --- /dev/null +++ b/gtfs-book/ch-12-feed-information.md @@ -0,0 +1,36 @@ +## 12. Feed Information (feed_info.txt) + +*This file is ***optional*** in a GTFS feed.* + +Feed providers can include additional information about a feed using +`feed_info.txt`. It should only ever have a single row (other than the +CSV header row). + +| Field | Required? | Description | +| :----- | :-------- | :---------- | +| `feed_publisher_name` | Required | The name of the organization that publishes the feed. This may or may not be the same as any agency in `agency.txt`. | +| `feed_publisher_url` | Required | The URL of the feed publisher's web site. | +| `feed_lang` | Required | This specifies the language used in the feed. If an agency also has a language specified, then the agency's value should override this value. | +| `feed_start_date` | Optional | This value is a date in `YYYYMMDD` format that asserts the data in this feed is valid from this date. If specified, it typically matches up with the earliest date in `calendar.txt` or `calendar_dates.txt`, but if it is earlier, this is explicitly saying there are no services running between this date and the earliest service date. | +| `feed_end_date` | Optional | This value is a date in `YYYYMMDD` format that asserts the data in this feed is valid until this date. If specified, it typically matches up with the latest date in `calendar.txt` or `calendar_dates.txt`, but if it is earlier, this is explicitly saying there are no services running between the latest service date and this date. | +| `feed_version` | Optional | A string that indicates the version of this feed. This can be useful to let feed publishers know whether the latest version of their feed has been incorporated. | + +### Sample Data + +The following sample data is taken from the GTFS feed of TriMet in +Portland (). + +| `feed_publisher_name` | `feed_publisher_url` | `feed_lang` | `feed_start_date` | `feed_end_date` | `feed_version` | +| :-------------------- | :-------------------------------------- | :---------- | :---------------- | :-------------- | :---------------- | +| TriMet | [http://trimet.org](http://trimet.org/) | en | | | 20140121-20140421 | + +In this example, TriMet do not include the start or end dates, meaning +you should derive the dates this feed is active for by the dates in +`calendar_dates.txt` (this particular feed does not have a +`calendar.txt` file). + +TriMet use date stamps to indicate the feed version. This feed was +published on 21 January 2014 and includes data up until 21 April 2014, +so it appears they use the first/last dates as a way to specify their +version. Each agency has its own method. + diff --git a/gtfs-book/ch-13-importing-to-sql.md b/gtfs-book/ch-13-importing-to-sql.md new file mode 100644 index 0000000..e96517c --- /dev/null +++ b/gtfs-book/ch-13-importing-to-sql.md @@ -0,0 +1,125 @@ +## 13. Importing a GTFS Feed to SQL + +One of the great things about GTFS is that it is already in a format +conducive to being used in an SQL database. The presence of various IDs +in each of the different files makes it easy to join the tables in order +to extract the data you require. + +To try this yourself, download `GtfsToSql` +(). This is a Java +command-line application that imports a GTFS feed to an SQLite database. +This application also supports PostgreSQL, but the examples used here +are for SQLite. + +The pre-compiled `GtfsToSql` Java archive can be downloaded from its +GitHub repository at +. + +To use `GtfsToSql`, all you need is an extracted GTFS feed. The +following instructions demonstrate how you to import the TriMet feed +that has been referenced throughout this book. + +Firstly, download and extract the feed. The following commands use curl +to download the file, then unzip to extract the file to a sub-directory +called `trimet`. + +``` +$ curl http://developer.trimet.org/schedule/gtfs.zip > gtfs.zip + +$ unzip gtfs.zip -d trimet/ +``` + +To create an SQLite database from this feed, the following command can +be used. + +``` +$ java -jar GtfsToSql.jar -s jdbc:sqlite:./db.sqlite -g ./trimet +``` + +This may take a minute or two to complete (you will see progress as it +imports the feed and then creates indexes), and at the end you will have +a GTFS database in a file called `db.sqlite`. You can then query this +database with the command-line `sqlite3` tool, as shown in the +following example. + +``` +$ sqlite3 db.sqlite +sqlite> SELECT * FROM agency; +|TriMet|http://trimet.org|America/Los_Angeles|en|503-238-7433 + +sqlite> SELECT * FROM routes WHERE route_type = 0; +90|||MAX Red Line||0|http://trimet.org/schedules/r090.htm|| +100|||MAX Blue Line||0|http://trimet.org/schedules/r100.htm|| +190|||MAX Yellow Line||0|http://trimet.org/schedules/r190.htm|| +193||Portland Streetcar|NS Line||0|http://trimet.org/schedules/r193.htm|| +194||Portland Streetcar|CL Line||0|http://trimet.org/schedules/r194.htm|| +200|||MAX Green Line||0|http://trimet.org/schedules/r200.htm|| +250|||Vintage Trolley||0|http://trimet.org/schedules/r250.htm|| +``` + +The first query above finds all agencies stored in the database, while +the second finds all routes marked as Light Rail (`route_type` of +`0`). + +**Note: In the following chapters there are more SQL examples. All of +these examples are geared towards running on an SQLite database that has +been created in this manner.** + +All tables in this database match up with the corresponding GTFS +filename (so for `agency.txt`, the table name is `agency`, while for +`stop_times.txt` the table name is `stop_times`). The columns in SQL +have the same name as the value in the corresponding GTFS file. + +***Note:** All data imported using this tool is stored as text in the +database. This means you may need to be careful when querying integer +data. For example, ordering stop times by stop_sequence may not produce +expected results (for instance, 29 as a string comes before 3). Although +it is a performance hit, you can change this behavior by casting the +value to integer, such as: ORDER BY stop_sequence + 0. The reason +`GtfsToSql` works in this way is because it is intended as a lightweight +tool to be able to quickly query GTFS data. I recommend rolling your own +importer to treat data exactly as you need it, especially in conjunction +with some of the optimization techniques recommended later in this book.* + +### File Encodings + +The GTFS specification does not indicate whether files should be encoded +using UTF-8, ISO-8859-1 or otherwise. Since a GTFS feed is not +necessarily in English, you must be willing to handle an extended +character set. + +The GtfsToSql tool introduced above automatically detects the encoding +of each file using the juniversalchardet Java library +(). + +I recommend you take some time looking at the source code of GtfsToSql +to further understand this so you are aware of handling encodings +correctly if you write your own parser. + +### Optimizing GTFS Feeds + +If you are creating a database that is to be distributed onto a mobile +device such as an iPhone or Android phone, then disk space and +computational power is at a premium. Even if you are setting up a +database to be queried on a server only, then making the database +perform as quickly as possible is still important. + +In the following chapters are techniques for optimizing GTFS feeds. +There are many techniques that can be applied to improve the performance +of GTFS, such as: + +* Using integer identifiers rather than string identifiers (for route + IDs, trip IDs, stop IDs, etc.) and creating appropriate indexes +* Removing redundant shape points and encoding shapes +* Deleting unused data +* Reusing repeating trip patterns. + +Changing the data to use integer IDs makes the greatest improvement to +performance, but the other techniques also help significantly. + +Depending on your needs, there are other optimizations that can be made +to reduce file size and speed up querying of the data, but the ease of +implementing them may depend on your database storage system and the +programming language used to query the data. The above list is a good +starting point. + diff --git a/gtfs-book/ch-14-switching-to-integer-ids.md b/gtfs-book/ch-14-switching-to-integer-ids.md new file mode 100644 index 0000000..4d3914a --- /dev/null +++ b/gtfs-book/ch-14-switching-to-integer-ids.md @@ -0,0 +1,111 @@ +## 14. Switching to Integer IDs + +There are a number of instances in a GTFS feed where IDs are used, such +as to identify routes, trips, stops and shapes. There are no specific +guidelines in GTFS as to the type of data or length an ID can be. As +such, IDs in some GTFS feeds maybe anywhere up to 30 or 40 characters +long. + +Using long strings as IDs is extremely inefficient as they make the size +of a database much larger than it needs to be, as well as making +querying the data much slower. + +To demonstrate, consider a GTFS feed where trip IDs are 30 characters +long. If there are 10,000 trips, each with an average of 30 stops in +`stop_times.txt`, then the IDs alone take up 9.3 MB of storage. +Realistically speaking, you need to index the `trip_id` field in order +to look up a trip's stop times quickly, which uses even more space. + +The following SQL statements show how you might represent GTFS without +optimizing the identifiers. For brevity, not all fields from the GTFS +feed are included here. + +```sql +CREATE TABLE trips ( + trip_id TEXT, + route_id TEXT, + service_id TEXT +); + +CREATE INDEX trips_trip_id ON trips (trip_id); + +CREATE INDEX trips_route_id ON trips (route_id); + +CREATE INDEX trips_service_id ON trips (service_id); + +CREATE TABLE stop_times ( + trip_id TEXT, + stop_id TEXT, + stop_sequence INTEGER +); + +CREATE INDEX stop_times_trip_id ON stop_times (trip_id); + +CREATE INDEX stop_times_stop_id on stop_times (stop_id); +``` + +If you were to add an integer column to `trips` called, say, +`trip_index`, then you can reference that value from `stop_times` +instead of `trip_id`. The following SQL statements show this. + +```sql +CREATE TABLE trips ( + trip_id TEXT, + trip_index INTEGER, + route_id TEXT, + service_id TEXT +); + +CREATE INDEX trips_trip_id ON trips (trip_id); +CREATE INDEX trips_trip_index ON trips (trip_index); +CREATE INDEX trips_route_id ON trips (route_id); +CREATE INDEX trips_service_id ON trips (service_id); + +CREATE TABLE stop_times ( + trip_index INTEGER, + stop_id TEXT, + stop_sequence INTEGER +); + +CREATE INDEX stop_times_trip_index ON stop_times (trip_index); + +CREATE INDEX stop_times_stop_id on stop_times (stop_id); +``` + +This results in a significant space saving (when you consider how large +`stop_times` can be), as well as being far quicker to look up stop +times based on a trip ID. Note that the original `trip_id` value is +retained so it can be referenced if required. + +Without adding `trip_index`, you would use the following query to find +stop times given a trip ID. + +```sql +SELECT * FROM stop_times + WHERE trip_id = 'SOME_LONG_TRIP_ID' + ORDER BY stop_sequence; +``` + +With the addition of `trip_index`, you need to first find the record +in `trips`. This can be achieved using the following query. This is a +small sacrifice compared to performing string comparison on all stop +times. + +```sql +SELECT * FROM stop_times + WHERE trip_index = ( + SELECT trip_index FROM trips WHERE trip_id = 'SOME_LONG_TRIP_ID' + ) + ORDER BY stop_sequence; +``` + +You can make the same change for the other IDs in the feed, such as +`route_id` and `stop_id`. For these columns you still keep (and +index) the original values in `routes` and `stops` respectively, +since you may still need to look up records based on these values. + +***Note:** Even though this book recommends optimizing feeds in this +manner, the remainder of examples in this book only use their original +IDs, in order to simplify the examples and to ensure compatibility with +the `GtfsToSql` tool introduced previously.* + diff --git a/gtfs-book/ch-15-optimizing-shapes.md b/gtfs-book/ch-15-optimizing-shapes.md new file mode 100644 index 0000000..fbb1125 --- /dev/null +++ b/gtfs-book/ch-15-optimizing-shapes.md @@ -0,0 +1,158 @@ +## 15. Optimizing Shapes + +Shape data in a GTFS feed (that is, the records from `shapes.txt`) +represents a large amount of data. There are a number of ways to reduce +this data, which can help to: + +* Speed up data retrieval +* Reduce the amount of data to transmit to app / web site users +* Speed up rendering of the shape onto a map (such as a native mobile + map or a JavaScript map). + +Two ways to reduce shape data are as follows: + +* **Reducing the number of points in a shape.** The shapes included in + GTFS are often very precise and include a number of redundant + points. Many of these can be removed without a noticeable loss of + shape quality using the *Douglas-Peucker Algorithm*. +* **Encoding all points in a shape into a single value.** The *Encoded + Polyline Algorithm* used in the Google Maps JavaScript API can also + be used with GTFS shapes. This reduces the amount of storage + required and also makes looking up all points in a shape far + quicker. + +### Reducing Points in a Shape + +Many of the shapes you find in GTFS feeds are extremely detailed. They +often follow the exact curvature of the road and may consist of hundreds +or thousands of points for a trip that might have only 30 or 40 stops. + +While this level of detail is useful, the sheer amount of data required +to be rendered on a map can be a massive performance hit from the +perspective of retrieving the data as well as rendering on a map. +Realistically, shapes do not need this much detail in order to convey +their message to your users. + +Consider the following shape from Portland that has been rendered using +Google Maps. The total shape consists of 1913 points. + +![Original Shape](images/shape-original.jpg) + +Compare this now to the same shape that has had redundant points +removed. The total number of points in this shape is 175, which +represents about a 90% reduction. + +![Reduced Shape](images/shape-reduced.jpg) + +If you look closely, you can see some minor loss of detail, but for the +most part, the shapes are almost identical. + +This reduction in points can be achieved using the Douglas-Peucker +Algorithm. It does so by discarding points that do not deviate +significantly between its surrounding points. + +The Douglas-Peucker Algorithm works as follows: + +* Begin with the first and last points in the path (A and B). These + are always kept. +* Find the point between the first and last that is furthest away from + the line joining the first and last line (the orthogonal distance -- + see the figure below). +* If this point is greater than the allowed distance (the tolerance + level), the point is kept (call it X). +* Repeat this algorithm twice: once using A as the first point and X + as the last point, then again using X as the first point and B as + the last point. + +This algorithm is recursive, and continues until all points have been +checked. + +***Note:** The tolerance level determines how aggressively points are +removed. A higher tolerance value is less aggressive and discards less +data, while a lower tolerance discards more data.* + +The following diagram shows what orthogonal distance means. + +![Orthogonal Distance](images/orthogonal-distance.jpg) + +The following resources provide more information about the +Douglas-Peucker Algorithm and describe how to implement it in your own +systems: + +* +* +* . + +You can often discard about 80-90% of all shape data before seeing a +significant loss of line detail. + +### Encoding Shape Points + +A single entry in `shapes.txt` corresponds to a single point in a +single shape. Each entry includes a shape ID, a latitude and longitude. + +***Note:** The `shape_dist_traveled` field is also included, but you do not +strictly need to use this field (nor the corresponding field in +stop_times.txt). The technique described in this section will not work +if you intend to use `shape_dist_traveled`.* + +This means if you want to look up a shape by its ID, you may need to +retrieve several hundreds of rows from a database. Using the Encoded +Polyline Algorithm you can change your GTFS database so each shape is +represented by a single row in a database. This means the shape can be +found much more quickly and much less data needs to be processed to +determine the shape. + +Consider the following data, taken from TriMet's `shapes.txt` file. +This data represents the first five points of a shape. + +| `shape_id` | `shape_pt_lat` | `shape_pt_lon` | `shape_pt_sequence` | `shape_dist_traveled` | +| :--------- | :------------- | :------------- | :------------------- | :--------------------- | +| `185328` | `45.52291 ` | `-122.677372` | `1` | `0.0` | +| `185328` | `45.522921` | `-122.67737` | `2` | `3.7` | +| `185328` | `45.522991` | `-122.677432` | `3` | `34.0` | +| `185328` | `45.522992` | `-122.677246` | `4` | `81.5` | +| `185328` | `45.523002` | `-122.676567` | `5` | `255.7` | + +If you apply the Encoded Polyline Algorithm to this data, the +coordinates can be represented using the following string. + +``` +eeztGrlwkVAAML?e@AgC +``` + +To learn how to arrive at this value, you can read up on the Encoded +Polyline Algorithm at +. + +Instead of having every single shape point in a single table, you can +create a table that has one record per shape. The following SQL +statement is a way you could achieve this. + +```sql +CREATE TABLE shapes ( + shape_id TEXT, + encoded_shape TEXT +); +``` + +The following table shows how this data could be represented in a +database. + +| `shape_id` | `encoded_shape` | +| :--------- | :--------------------- | +| `185328` | `eeztGrlwkVAAML?e@AgC` | + +Storing the shape in this manner means you can retrieve an entire shape +by looking up only one database row and running it through your decoder. + +To further demonstrate how both the encoding and decoding works, try out +the polyline utility at +. + +You can find implementations for encoding and decoding points for +various languages at the following locations: + +* +* + diff --git a/gtfs-book/ch-16-deleting-unused-data.md b/gtfs-book/ch-16-deleting-unused-data.md new file mode 100644 index 0000000..f2a2fc0 --- /dev/null +++ b/gtfs-book/ch-16-deleting-unused-data.md @@ -0,0 +1,106 @@ +## 16. Deleting Unused Data** + +Once you have imported a GTFS feed into a database, it is possible for +there to be a lot of redundant data. This can ultimately slow down any +querying of that data as well as bloating the size of the database. If +you are making an app where the GTFS database is queried on the device +then disk space and computational time are at a premium, so you must do +what you can to reduce resource usage. + +The first thing to check for is expired services. You can do this by +searching `calendar.txt` for entries that expire before today's date. +Be aware though, you also need to ensure there are no +`calendar_dates.txt` entries overriding these services (a service +could have an `end_date` of, say, `20140110`, but also have a +`calendar_dates.txt` entry for `20140131`). + +Firstly, find service IDs in `calendar_dates.txt` that are still +active using the following query. + +```sql +SELECT service_id FROM calendar_dates WHERE date >= '20140110'; +``` + +***Note:** In order to improve performance of searching for dates, you +should import the date field in `calendar_dates.txt` as an integer, as +well as `start_date` and `end_date` in `calendar.txt`.* + +Any services matched in this query should not be removed. You can then +find service IDs in `calendar.txt` with the following SQL query. + +```sql +SELECT * FROM calendar WHERE end_date < '20140110' + AND service_id NOT IN ( + SELECT service_id FROM calendar_dates WHERE date >= '20140110' + ); +``` + +Before deleting these services, corresponding trips and stop times must +be removed since you need to know the service ID in order to delete a +trip. Likewise, stop times must be deleted before trips since you need +to know the trip IDs to be removed. + +```sql +DELETE FROM stop_times WHERE trip_id IN ( + SELECT trip_id FROM trips WHERE service_id IN ( + SELECT service_id FROM calendar WHERE end_date < '20140110' + AND service_id NOT IN ( + SELECT service_id FROM calendar_dates WHERE date >= '20140110' + ) + ) + ); +``` + +Now there may be a series of trips with no stop times. Rather than +repeating the above sub-queries, a more thorough way of removing trips +is to remove trips with no stop times. + +```sql +DELETE FROM trips WHERE trip_id NOT IN ( + SELECT DISTINCT trip_id FROM stop_times +); +``` + +With all `service_id` references removed, you can remove the expired +rows from `calendar.txt` using the following SQL query. + +```sql +DELETE FROM calendar WHERE end_date < '20140110' + AND service_id NOT IN ( + SELECT DISTINCT service_id FROM calendar_dates WHERE date >= '20140110' + ); +``` + +The expired rows in `calendar_dates.txt` can also be removed, which +can be achieved using the following query. + +```sql +DELETE FROM calendar_dates WHERE date < '20140110'; +``` + +There may now be some stops that are not used by any trips. These can be +removed using the following query. + +```sql +DELETE FROM stops WHERE stop_id NOT IN ( + SELECT DISTINCT stop_id FROM stop_times +); +``` + +Additionally, you can remove unused shapes and routes using the +following queries. + +```sql +DELETE FROM shapes WHERE shape_id NOT IN ( + SELECT DISTINCT shape_id FROM trips +); + +DELETE FROM routes WHERE route_id NOT IN ( + SELECT DISTINCT route_id FROM trips +); +``` + +There are other potential rows that can be removed (such as records in +`transfers.txt` that reference non-existent stops), but hopefully you +get the idea from the previous queries. + diff --git a/gtfs-book/ch-17-searching-for-trips.md b/gtfs-book/ch-17-searching-for-trips.md new file mode 100644 index 0000000..dd0a901 --- /dev/null +++ b/gtfs-book/ch-17-searching-for-trips.md @@ -0,0 +1,346 @@ +## 17. Searching for Trips** + +This section shows you how to search for trips in a GTFS feed based on +specified times and stops. + +The three scenarios covered are: + +* Finding all stops departing from a given stop after a certain time +* Finding all stops arriving at a given stop before a certain time +* Finding all trips between two stops after a given time + +The first and second scenarios are the simplest, because they only rely +on a single end of each trip. The third scenario is more complex because +you have to ensure that each trip returned visits both the start and +finish stops. + +When searching for trips, the first thing you need to know is which +services are running for the given search time. + +### Finding Service IDs + +The first step in searching for trips is to determine which services are +running. To begin with, you need to find service IDs for a given date. +You then need to handle exceptions accordingly. That is, you need to add +service IDs and remove service IDs based on the rules in +`calendar_dates.txt`. + +***Note:** In *Scheduling Past Midnight*, you were shown how +GTFS works with times past midnight. The key takeaway from this is that +you have to search for trips for two sets of service IDs. This is +covered as this chapter progresses.* + +In Australia, 27 January 2014 (Monday) was the holiday for Australia +Day. This is used as an example to demonstrate how to retrieve service +IDs. + +Firstly, you need the main set of service IDs for the day. The following +SQL query achieves this. + +```sql +SELECT service_id FROM calendar + WHERE start_date <= '20140127' AND end_date >= '20140127' + AND monday = 1; + +# Result: 1, 2, 6, 9, 18, 871, 7501 +``` + +Next you need to find service IDs that are to be excluded, as achieved +by the following SQL query. + +```sql +SELECT service_id FROM calendar_dates + WHERE date = '20140127' AND exception_type = 2; + +# Result: 1, 2, 6, 9, 18, 871, 7501 +``` + +Finally, you need to find service IDs that are to be added. This query +is identical to the previous query, except for the different +`exception_type` value. + +```sql +SELECT service_id FROM calendar_dates + WHERE date = '20140127' AND exception_type = 1; + +# Result: 12, 874, 4303, 7003 +``` + +You can combine these three queries all into a single query in SQLite +using `EXCEPT` and `UNION`, as shown in the following SQL query. + +```sql +SELECT service_id FROM calendar + WHERE start_date <= '20140127' AND end_date >= '20140127' + AND monday = 1 + +UNION + +SELECT service_id FROM calendar_dates + WHERE date = '20140127' AND exception_type = 1 + +EXCEPT + +SELECT service_id FROM calendar_dates + WHERE date = '20140127' AND exception_type = 2; + +# Result: 12, 874, 4304, 7003 +``` + +Now when you search for trips, only trips that have a matching +`service_id` value are included. + +### Finding Trips Departing a Given Stop + +In order to determine the list of services above, a base timestamp on +which to search is needed. For the purposes of this example, assume that +timestamp is 27 January 2014 at 1 PM (`13:00:00` when using GTFS). + +This example searches for all services departing from Adelaide Railway +Station, which has stop ID `6665` in the Adelaide Metro GTFS feed. To +find all matching stop times, the following query can be performed. + +```sql +SELECT * FROM stop_times + WHERE stop_id = '6665' + AND departure_time >= '13:00:00' + AND pickup_type = 0 + ORDER BY departure_time; +``` + +This returns a series of stop times that match the given criteria. The +only problem is it does not yet take into account valid service IDs. + +***Note:** This query may also return stop times that are the final stop +on a trip, which is not useful for somebody trying to find departures. +You may want to modify your database importer to override the final stop +time of each trip so its `pickup_type` has a value of `1` (no pick-up) and +its first stop time so it has a `drop_off_type` of `1` (no drop-off).* + +To make sure only the correct trips are returned, join `stop_times` +with `trips` using `trip_id`, and then include the list of service +IDs. For the purposes of this example the service IDs, stop ID and +departure time are being hard-coded. You can either embed a sub-query, +or include the service IDs via code. + +```sql +SELECT t.*, st.* FROM stop_times st, trips t + WHERE st.stop_id = '6665' + AND st.trip_id = t.trip_id + AND t.service_id IN ('12', '874', '4304', '7003') + AND st.departure_time >= '13:00:00' + AND st.pickup_type = 0 + ORDER BY st.departure_time; +``` + +This gives you the final list of stop times matching the desired +criteria. You can then decide specifically which data you need to +retrieve; you now have the `trip_id`, meaning you can find all stop +times for a given trip if required. + +If you need to restrict the results to only those that occur after the +starting stop, you can retrieve stop times with only a `stop_sequence` +larger than that of the stop time returned in the above query. + +### Finding Trips Arriving at a Given Stop + +In order to find the trips arriving at a given stop before a specified +time, it is just a matter of making slight modifications to the above +query. Firstly, check the `arrival_time` instead of +`departure_time`. Also, check the `drop_off_type` value instead of +`pickup_type`. + +```sql +SELECT t.*, st.* FROM stop_times st, trips t + WHERE st.stop_id = '6665' + AND st.trip_id = t.trip_id + AND t.service_id IN ('12', '874', '4304', '7003') + AND st.arrival_time <= '13:00:00' + AND st.drop_off_type = 0 + ORDER BY st.arrival_time DESC; +``` + +For this particular data set, there are trips from four different routes +returned. If you want to restrict this to a particular route, you can +filter on the `t.route_id` value. + +```sql +SELECT t.*, st.* FROM stop_times st, trips t + WHERE st.stop_id = '6665' + AND st.trip_id = t.trip_id + AND t.service_id IN ('12', '874', '4304', '7003') + AND t.route_id = 'BEL' + AND st.arrival_time <= '13:00:00' + AND st.drop_off_type = 0 + ORDER BY st.arrival_time DESC; +``` + +### Performance of Searching Text-Based Times + +These examples search using text-based arrival/departure times (such as +`13:00:00`). This works because the GTFS specification mandates that +all times are `HH:MM:SS` format (although `H:MM:SS` is allowed for times +earlier than 10 AM). + +Doing this kind of comparison (especially if you are scanning millions +of rows) is quite slow and expensive. It is more efficient to convert +all times stored in the database to integers that represent the number +of seconds since midnight. + +***Note:** The GTFS specification states that arrival and departure times +are "noon minus 12 hours" in order to account for daylight savings time. +This is effectively midnight, except for the days that daylight savings +starts or finishes.* + +In order to achieve this, you can convert the text-based time to an +integer with `H * 3600 + M * 60 + S`. For example, `13:35:21` can +be converted using the following steps. + +``` + (13 * 3600) + (35 * 60) + (21) += 46800 + 2100 + 21 += 48921 +``` + +You can then convert back to hours, minutes and seconds in order to +generate timestamps in your application as shown in the following +algorithm. + +``` +H = floor( 48921 / 3600 ) + = floor( 13.59 ) + = 13 + +M = floor( 48921 / 60 ) % 60 + = floor( 815.35 ) % 60 + = 815 % 60 + = 35 + +S = 48921 % 60 + = 21 +``` + +### Finding Trips Between Two Stops + +Now that you know how to look up a trip from or to a given stop, the +previous query can be expanded so both the start and finish stop are +specified. The following example finds trips that depart after 1 PM. The +search only returns trips departing from *Adelaide Railway Station* +(stop ID `6665`) are shown. Additionally, only trips that then visit +*Blackwood Railway Station* (stop IDs `6670` and `101484`) are +included. + +In order to achieve this, the following changes must be made to the +previous examples. + +* **Join against stop_times twice.** Once for the departure stop time + and once for the arrival stop time. +* **Allow for multiple stop IDs at one end.** The destination in this + example has two platforms, so you need to check both of them. +* **Ensure the departure time is earlier than the arrival time. + **Otherwise trips heading in the opposite direction may also be + returned. + +The following query demonstrates how this is achieved. + +```sql +SELECT t.*, st1.*, st2.* + FROM trips t, stop_times st1, stop_times st2 + WHERE st1.trip_id = t.trip_id + AND st2.trip_id = t.trip_id + AND st1.stop_id = '6665' + AND st2.stop_id IN ('6670', '101484') + AND t.service_id IN ('12', '874', '4304', '7003') + AND st1.departure_time >= '13:00:00' + AND st1.pickup_type = 0 + AND st2.drop_off_type = 0 + AND st1.departure_time < st2.arrival_time + ORDER BY st1.departure_time; +``` + +In this example, the table alias `st1` is used for the departure stop +time. Once again, the stop ID must match, as well as the departure time +and pick-up type. + +For the arrival stop time the alias `st2` is used. This table also +joins the `trips` table using `trip_id`. Since the destination has +multiple stop IDs, the SQL `IN` construct is used. The arrival time is +not important in this example, so only the departure is checked. + +The final thing to check is that the departure occurs before the +arrival. If you do not perform this step, then trips traveling in the +opposite direction may also be returned. + +***Note:** Technically, there may be multiple results returned for the +same trip. For some transit agencies, a single trip may visit the a stop +more than once. If this is the case, you should also check the trip +duration (arrival time minus departure time) and use the shortest trip +when the same trip is returned multiple times.* + +### Accounting for Midnight + +As discussed previously, GTFS has the ability for the trips to depart or +arrive after midnight for a given service day without having to specify +it as part of the next service day. Consequently, while the queries +above are correct, they do not necessarily paint the full picture. + +In reality, when performing a trip search, you need to take into account +trips that have wrapped times (for instance, where 12:30 AM is specified +as `24:30:00`). If you want to find trips that depart after 12:30 AM +on a given day, you need to check for trips departing after +`00:30:00` on that day, as well as for trips departing at +`24:30:00` on the previous day. + +This means that for each trip search you are left with two sets of +trips, which you must then merge and present as appropriate. + +***Note:** In reality, agencies generally do not have trips that overlap +from multiple service days, so technically you often only need one query +(for example, a train service might end on 12:30 AM then restart on the +next service day at 4:30 AM). If your app / web site only uses a single +feed where you can tune your queries manually based on how the agency +operates, then you can get away with only querying a single service day. +On the other hand, if you are building a scalable system that works with +data from many agencies, then you need to check both days.* + +To demonstrate how this works in practice, the following example +searches for all trips that depart after 12:30:00 AM on 14 March 2014. +The examples earlier in this chapter showed how to find the service IDs +for a given date. To account for midnight, service IDs for both March 14 +(the "main" service date) and March 13 (the overlapping date) need to be +determined. + +Assume that March 13 has a service ID of `C1` and March 14 has a +service ID of `C2`. First you need to find the departures for March +14, as shown in the following query. + +```sql +SELECT t.*, st.* FROM stop_times st, trips t + WHERE st.stop_id = 'S1' + AND st.trip_id = t.trip_id + AND t.service_id IN ('C1') + AND st.departure_time >= '00:30:00' + AND st.pickup_type = 0 + ORDER BY st.departure_time; +``` + +The resultant list needs to be combined with the trips that depart after +midnight from the March 13 service. To check this, it is just a matter +of swapping in the right service IDs, then adding 24 hours to the search +time. + +```sql +SELECT t.*, st.* FROM stop_times st, trips t + WHERE st.stop_id = 'S1' + AND st.trip_id = t.trip_id + AND t.service_id IN ('C2') + AND st.departure_time >= '24:30:00' + AND st.pickup_type = 0 + ORDER BY st.departure_time; +``` + +In order to get your final list of trips you must combine the results +from both of these queries. If you are generating complete timestamps in +order to present the options to your users, just remember to account for +the results from the second query being 24 hours later. + diff --git a/gtfs-book/ch-18-working-with-trip-blocks.md b/gtfs-book/ch-18-working-with-trip-blocks.md new file mode 100644 index 0000000..d59b2c4 --- /dev/null +++ b/gtfs-book/ch-18-working-with-trip-blocks.md @@ -0,0 +1,109 @@ +## 18. Working With Trip Blocks** + +One of the more complex aspects of GTFS is how to properly use the +`block_id` field in `trips.txt`. The concept is simple, but +incorporating this information in a manner that is simple to understand +for a passenger can be more difficult. + +A single vehicle (such as a bus or train) completes multiple trips in a +single service day. For instance, once a bus completes its trip (Trip X) +from Location A to Location B, it then begins another trip (Trip Y) from +Location B to Location C. It then completes a final trip (Trip Z) from +Location C back to Location A. + +If a passenger boards in the middle of Trip X and is allowed to stay on +the bus until the end of the Trip Y, then this should be represented in +GTFS by giving Trips X, Y, Z the same `block_id` value. + +The following diagram shows a vehicle that performs two trips, +completing opposite directions of the same route. Often a single vehicle +will do many more than just two trips in a day. + +![Block](images/block-01.png) + +***Note:** When trips in a block are represented in stop_times.txt, the +final stop of a trip and the first stop of the subsequent trip must both +be included, even though they are typically at the same stop (and often +the same arrival & departure time). The final stop of the first trip is +drop-off only, while the first stop of the next trip is pick-up only.* + +In the following diagram, the vehicle completes three trips, each for +separate routes. As in the previous diagram, the vehicle will likely +complete more trips over a single day. + +![Block](images/block-02.png) + +These two diagrams show the difficulties that can arise when trying to +calculate trips across multiple blocks. The first diagram is repeated +below, this time with stops marked so trips between two locations can be +found. + +![Block](images/block-03.png) + +Consider the scenario where you want to retrieve all trip times from +Stop S2 to Stop S1. Since the vehicle gets to Stop S3 then turns around +and heads back to S1, the trip search returns two options: + +1. Board at S2 on the upper trip, travel via S3, then get to S1. +2. Board at S2 on the lower trip, travel directly to S1. + +The second option is a subset of the first option, which means it is a +far shorter trip (plus it avoids the passenger getting annoyed from +passing their starting point again). To determine which option to use, +you can use the trip duration. + +Now consider the other type of block formation, where a vehicle +completes subsequent trips from different routes. + +![Block](images/block-04.png) + +If a passenger wants to travel from Stop S2 to S4, you will not get the +situation where the vehicle travels past the same location twice. +However, it is also possible for a passenger to travel from Stop S2 to +Stop S6 without ever having to change vehicles. + +Referring back to the previous chapter about performing trip searches, +it is relatively straightforward to account for blocks in these +searches. Using the query to find trips between two stops as a +reference, the following changes need to be made: + +* Instead of joining both occurrences of `stop_times` to the + `trips` table, you now need two occurrences of `trips`. +* Each `stop_times` table occurrence is joined to a separate + `trips` table occurrence. +* The two occurrences of `trips` are joined on the `block_id` + value (if it is available). + +The following query shows how to find trips that start at stop `S2` +and finish at stop `S4`, departing after 1 PM on the day with service +ID `C1`. + +```sql +SELECT t1.*, t2.*, st1.*, st2.* + FROM trips t1, trips t2, stop_times st1, stop_times st2 + WHERE st1.trip_id = t1.trip_id + AND st2.trip_id = t2.trip_id + AND st1.stop_id = 'S2' + AND st2.stop_id = 'S4' + AND t1.service_id = 'C1' + AND ( + t1.trip_id = t2.trip_id + OR ( + LENGTH(t1.block_id) > 0 AND t1.block_id = t2.block_id + ) + ) + AND st1.departure_time >= '13:00:00' + AND st1.pickup_type = 0 + AND st2.drop_off_type = 0 + AND st1.departure_time < st2.arrival_time + ORDER BY st1.departure_time; +``` + +Since `block_id` may be unpopulated, the query joins on both the +`trip_id` and the `block_id`. + +***Note:** An alternative is to guarantee every single trip has a +`block_id` value when importing -- even if some blocks only consist of +a single trip. If you can guarantee this condition, then you can +simplify this query by using `t1.block_id = t2.block_id`.* + diff --git a/gtfs-book/ch-19-calculating-fares.md b/gtfs-book/ch-19-calculating-fares.md new file mode 100644 index 0000000..df24dd4 --- /dev/null +++ b/gtfs-book/ch-19-calculating-fares.md @@ -0,0 +1,296 @@ +## 19. Calculating Fares** + +In order to calculate a fare for a trip in GTFS, you must use data from +`fare_attributes.txt` and `fare_rules.txt`. It is relatively +straightforward to calculate the cost of a single trip (that is, +boarding a vehicle, traveling for a number of stops, then disembarking), +but it becomes much more complicated when you need to take into account +multiple trip segments (that is, one or more transfers). + +***Note:** As it stands, many feed providers do not include fare +information. This is because many systems have a unique set of rules +that cannot be modelled with the current structure of fares in GTFS. +Additionally, it is not possible to determine different classes of +pricing (such as having a separate price for adults and children). For +the purposes of this chapter, these limitations are ignored.* + +For more discussion on how fares work in GTFS, refer to *Fare +Definitions (`fare_attributes.txt` & `fare_rules.txt`)*. + +This chapter first shows you how to calculate fares for a single trip, +then how to account for transfers and for multiple trips. + +### Calculating a Single Trip Fare + +Much of the logic used when calculating fares requires knowledge of the +zones used in a trip. + +***Note:** A zone is a physical area within a transit system that +contains a series of stops. They are used to group trip pricing into key +areas in a system. Some transit systems do not work like this (for +instance, they may measure the physical distance travelled rather than +specific stops), which is one reason why the GTFS fares model does not +work in all cases.* + +A zone is defined in GTFS by the `zone_id` column in `stops.txt`. A +single stop can only belong to one zone. + +Fares are matched to a trip using a combination of any of the following: + +* The route of the trip +* The zone of the stop the passenger boards from +* The zone of the stop the passenger disembarks +* The zone(s) of any stops on the trip that are passed while the + passenger is on board. + +Consider the following simplified data set that may appear in +`stops.txt` and `stop_times.txt`. Assume for this example that the +trip `T1` belongs to a route with an ID of `R1`. + +``` +stop_id,zone_id +S1,Z1 +S2,Z1 +S3,Z2 +S4,Z3 + +trip_id,stop_id,stop_sequence +T1,S1,1 +T1,S2,2 +T1,S3,3 +T1,S4,4 +``` + +If a passenger travels from stop S1 to stop S4, then their starting zone +is Z1, their finishing zone is Z3, and the zones they pass through are +Z1, Z2 and Z3. + +***Note:** When calculating fares, the start and finish zones are also +included in the zones passed through, so in this example you Z3 is also +considered as a zone that the trip passes through.* + +Using this data, you can now calculate matching fares. To do so, you +need to find all fares that match either of the following: + +* Fares that have no associated rules. +* Fares that have rules that match the specified trip. If a fare + defines multiple zones that must be passed through (using + `contains_id`), then all zones must be matched. + +If multiple fares qualify for a trip, then the cheapest fare is the one +to use. + +### Finding Fares With No Rules + +This is the simplest use-case for fares. You can find all matching fares +with the following SQL. + +```sql +SELECT * FROM fare_attributes WHERE fare_id NOT IN ( + SELECT DISTINCT fare_id FROM fare_rules +); +``` + +If a feed only has `fare_attributes.txt` records with no rules, then +the difference between the fares is in the transfer rules. This section +only covers calculating fares for a single trip with no transfers, so +for now you can just select the cheapest fare using the following SQL. + +```sql +SELECT * FROM fare_attributes WHERE fare_id NOT IN ( + SELECT DISTINCT fare_id FROM fare_rules + ) + ORDER BY price + 0 LIMIT 1; +``` + +***Note:** You still need to check for fares with one or more rules in +order to find the cheapest price. Also, `0` is added in this query in +order to cast a string to a number. When you roll your own importer you +should instead import this as a numerical value.* + +### Finding Fares With Matched Rules + +Next you must check against specific rules for a fare. In order to do +this, you need the starting zone, finishing zone, and all zones passed +through (including the start and finish zones). + +Referring back to the previous example, if a trip starts at `Z1`, +passes through `Z2` and finishes at `Z3`, you can find fare +candidates (that is, trips that *may* match), using the following SQL +query. + +```sql +SELECT * FROM fare_attributes WHERE fare_id IN ( + SELECT fare_id FROM fare_rules + WHERE (LENGTH(route_id) = 0 OR route_id = 'R1') + AND (LENGTH(origin_id) = 0 OR origin_id = 'Z1') + AND (LENGTH(destination_id) = 0 OR destination_id = 'Z3') + AND (LENGTH(contains_id) = 0 OR contains_id IN ('Z1', 'Z2', 'Z3') + ) + ); +``` + +This returns a list of fares that may qualify for the given trip. As +some fares have multiple rules, all must be checked. The algorithm to +represent this is as follows. + +``` +fares = [ result from above query ] + +qualifyingFares = [ ] + +for (fare in fares) { + if (qualifies(fare)) + qualifyingFares.add(fare) +} + +allFares = qualifyFares + faresWithNoRules + +passengerFare = cheapest(allFares) +``` + +As shown on the final two lines, once you have the list of qualifying +fares, you can combine these with fares that have no rules (from the +previous section) and then determine the cheapest fare. + +First though, you must determine if a fare with rules qualifies for the +given trip. If a fare specifies zones that must be passed through, then +all rules must be matched. + +***Note:** If a particular rule specifies a different route, start, or +finish than the one you are checking, you do not need to ensure the +`contains_id` matches, since this rule no longer applies. You still need +to check the other rules for this fare.* + +The algorithm needs to build up a list of zone IDs from the fare rules +in order to check against the trip. Once this has been done, you need to +check that every zone ID collected from the rules is contained in the +trip's list of zones. + +``` +qualifies(fare, routeId, originId, destinationId, containsIds) { + + fareContains = [ ] + + for (rule in fare.rules) { + if (rule.contains.length == 0) + continue + + if (rule.route.length > 0 AND rule.route != routeId) + continue + + if (rule.origin.length > 0 AND rule.origin != originId) + continue + + if (rule.desination.length > 0 AND rule.destination != destinationId) + continue + + fareContains.add(rule.containsId); + } + + if (fareContains.size == 0) + return YES + + if (containIds HAS EVERY ELEMENT IN fareContains) + return YES + else + return NO +} +``` + +This algorithm achieves the following: + +* Only rules that have a value for `contains_id` are relevant. Rules + that do not have this value fall through and should be considered as + qualified. +* If the route is specified but not equal to the one being checked, it + is safe to ignore the rule's `contains_id`. If the route is empty + or equal, the loop iteration can continue. +* Check for the `origin_id` and `destination_id` in the same + manner as `route_id`. +* If the route, origin and destination all qualify then store the + `contains_id` so it can be checked after the loop. + +The algorithm returns *yes* if the fare qualifies, meaning you can save +it as a qualifying fare. You can then return the cheapest qualifying +fare to the user. + +### Calculating Trips With Transfers + +Once you introduce transfers, fare calculation becomes more complicated. +A "trip with a transfer" is considered to be a trip where the passenger +boards a vehicle, disembarks, and then gets on another vehicle. For +example: + +* Travel on trip T1 from Stop S1 to Stop S2 +* Walk from Stop S2 to Stop S3 +* Travel on trip T2 from Stop S3 to Stop S4. + +In order to calculate the total fare for a trip with transfers, the +following algorithm is used: + +1. Retrieve list of qualifying fares for each trip individually +2. Create a list of every fare combination possible +3. Loop over all combinations and find the total cost +4. Return the lowest cost from Step 3. + +Step 1 was covered in *Calculating a Single Trip Fare*, but +you must skip the final step of finding the cheapest fare. This is +because the cheapest fare may change depending on subsequent transfers. +Instead, this step is performed once the cheapest *combination* is +determined. + +To demonstrate Step 2, consider the following example: + +* The trip on T1 from S1 to S2 yields the following qualifying fares: + F1, F2. +* The subsequent trip on T2 from S3 to S4 yields the following + qualifying fares: F3, F4. + +Generating every combination of these fares yields the following +possibilities: + +* F1 + F3 +* F1 + F4 +* F2 + F3 +* F2 + F4. + +Step 3 can now be performed, which involves finding the total cost for +each combinations. As you need to take into account the possibility of +timed transfers (according to the data stored in +`fare_attributes.txt`), you also need to know about the times of these +trips. + +The following algorithm can be used to calculate the total cost using +transfer rules. In this example, you would call this function once for +each fare combination. + +``` +function totalCost(fares) { + total = 0 + + for (fare in fares) { + freeTransfer = NO + + if (previousFare ALLOWS TRANSFERS) { + if (HAS ENOUGH TRANSFERS REMAINING) { + if (TRANSFER NOT EXPIRED) { + freeTransfer = YES + } + } + } + + if (!freeTransfer) + total = total + fare.price; + + previousFare = fare; + } + + return total; +} +``` + +Once all combinations have called the `totalCost` algorithm, you will +have a price for each trip. You can then return the lowest price as the +final price for the trip. + diff --git a/gtfs-book/ch-20-trip-patterns.md b/gtfs-book/ch-20-trip-patterns.md new file mode 100644 index 0000000..4b0cf73 --- /dev/null +++ b/gtfs-book/ch-20-trip-patterns.md @@ -0,0 +1,141 @@ +## 20. Trip Patterns** + +In a GTFS feed, a route typically has multiple trips that start and +finish at the same stops. If you are looking to reduce the size of the +data stored, then converting data from `stop_times.txt` into a series +of reusable patterns is an excellent way to do so. + +For two trips to share a common pattern, the following must hold true: + +* The stops visited and the order in which they are visited must be the same +* The time differences between each stop must be the same. + +The following table shows some fictional trips to demonstrate this. + +| **Stop** | **Trip 1** | **Trip 2** | **Trip 3** | +| :------- | :--------- | :--------- | :--------- | +| S1 | 10:00:00 | 10:10:00 | 10:20:00 | +| S2 | 10:02:00 | 10:13:00 | 10:22:00 | +| S3 | 10:05:00 | 10:15:00 | 10:25:00 | +| S4 | 10:06:00 | 10:18:00 | 10:26:00 | +| S5 | 10:10:00 | 10:21:00 | 10:30:00 | + +In a GTFS feed, this would correspond to 15 records in +`stop_times.txt`. If you look more closely though, you can see the +trips are very similar. The following table shows the differences +between each stop time, instead of the actual time. + +| **Stop** | **Trip 1** | **Trip 2** | **Trip 3** | +| :------- | :-------------- | :-------------- | :-------------- | +| S1 | 00:00:00 | 00:00:00 | 00:00:00 | +| S2 | 00:02:00 (+2m) | 00:03:00 (+3m) | 00:02:00 (+2m) | +| S3 | 00:05:00 (+5m) | 00:05:00 (+5m) | 00:05:00 (+5m) | +| S4 | 00:06:00 (+6m) | 00:08:00 (+8m) | 00:06:00 (+6m) | +| S5 | 00:10:00 (+10m) | 00:11:00 (+11m) | 00:10:00 (+10m) | + +You can see from this table that the first and third trip, although they +start at different times, have the same offsets between stops (as well +as stopping at identical stops). + +Instead of using a table to store stop times, you can store patterns. By +storing the ID of the pattern with each trip, you can reduce the list of +stop times in this example from 15 to 10. As only time offsets are +stored for each patterns, the trip starting time also needs to be saved +with each trip. + +You could use SQL such as the following to model this. + +```sql +CREATE TABLE trips ( + trip_id TEXT, + pattern_id INTEGER, + start_time TEXT, + start_time_secs INTEGER +); + +CREATE TABLE patterns ( + pattern_id INTEGER, + stop_id TEXT, + time_offset INTEGER, + stop_sequence INTEGER +); +``` + +The data you would store for trips in this example is shown in the +following table. + +| `trip_id` | `pattern_id` | `start_time` | `start_time_secs` | +| :-------- | :----------- | :----------- | :---------------- | +| T1 | 1 | 10:00:00 | 36000 | +| T2 | 2 | 10:10:00 | 36600 | +| T3 | 1 | 10:20:00 | 37200 | + +***Note:** The above table includes start_time_secs, which is an integer +value representing the number of seconds since the day started. Using +the hour, minutes and seconds in start_time, this value is `H * 3600 + M * 60 + S`.* + +In the `patterns` table, you would store data as in the following +table. + +| `pattern_id` | `stop_id` | `time_offset` | `stop_sequence` | +| :----------- | :-------- | :------------ | :-------------- | +| 1 | S1 | 0 | 1 | +| 1 | S2 | 120 | 2 | +| 1 | S3 | 300 | 3 | +| 1 | S4 | 360 | 4 | +| 1 | S5 | 600 | 5 | +| 2 | S1 | 0 | 1 | +| 2 | S2 | 180 | 2 | +| 2 | S4 | 300 | 3 | +| 2 | S5 | 480 | 4 | +| 2 | S6 | 660 | 5 | + +As you can see, this represents an easy way to significantly reduce the +amount of data stored. You could have tens or hundreds of trips each +sharing the same pattern. When you scale this to the entire feed, this +could reduce, say, 3 million records to about 200,000. + +***Note:** This is a somewhat simplified example, as there is other data +available in `stop_times.txt` (such as separate arrival/departure times, +drop-off type and pick-up type). You should take all of this data into +account when determining how to allocate patterns.* + +### Updating Trip Searches + +Changing your model to reuse patterns instead of storing every stop time +means your data lookup routines must also be changed. + +For example, to find all stop times for a given trip, you must now find +the pattern using the following SQL query. + +```sql +SELECT * FROM patterns + WHERE pattern_id = (SELECT pattern_id FROM trips WHERE trip_id = 'YOUR_TRIP_ID') + ORDER BY stop_sequence; +``` + +If you want to determine the arrival/departure time, you must add the +offset stored for the pattern record to the starting time stored with +the trip. This involves joining the tables and adding `time_offset` to +`start_time_secs`, as shown in the following query. + +```sql +SELECT t.start_time_secs + p.time_offset, p.stop_id + FROM patterns p, trips t + WHERE p.pattern_id = t.pattern_id + AND t.trip_id = 'YOUR_TRIP_ID' + ORDER BY p.stop_sequence; +``` + +### Other Data Reduction Methods + +There are other ways you can reduce the amount of data, such as only +using patterns to store the stops (and not timing offsets), and then +storing the timings with each trip record. A technique such as this +further reduces the size of the database, but the trade-off is that +querying the data becomes slightly more complex. + +Hopefully you can see that by using the method described in this chapter +there are a number of ways to be creative with GTFS data, and that you +must make decisions when it comes to speed, size, and ease of querying +data. diff --git a/gtfs-book/ch-21-conclusion.md b/gtfs-book/ch-21-conclusion.md new file mode 100644 index 0000000..0bdc2e0 --- /dev/null +++ b/gtfs-book/ch-21-conclusion.md @@ -0,0 +1,23 @@ +## Conclusion + +Thanks for reading *The Definitive Guide to GTFS*. While there are many +techniques that may take some time to comprehend in this book, the +content should bring you up to speed with GTFS quickly. + +At first glance, GTFS appears to be very simple, but there are a number +of "gotchas" which are not immediately apparent until you have spent +significant time working with a range of feeds. + +The key takeaways from this book are: + +* How GTFS feeds are structured +* How to import a GTFS feed to SQL and perform queries +* How to search for trips, handle blocks and calculate fares +* How to optimize an SQL database to minimize resource usage on mobile + devices. + +If you have enjoyed this book, please share it or feel free to contribution improvements or changes as necessary. + +*Quentin Zervaas* +February, 2014 + diff --git a/gtfs-book/images/agency-google-maps.png b/gtfs-book/images/agency-google-maps.png new file mode 100644 index 0000000..b9e7c2a Binary files /dev/null and b/gtfs-book/images/agency-google-maps.png differ diff --git a/gtfs-book/images/block-01.png b/gtfs-book/images/block-01.png new file mode 100644 index 0000000..9408632 Binary files /dev/null and b/gtfs-book/images/block-01.png differ diff --git a/gtfs-book/images/block-02.png b/gtfs-book/images/block-02.png new file mode 100644 index 0000000..162e5ea Binary files /dev/null and b/gtfs-book/images/block-02.png differ diff --git a/gtfs-book/images/block-03.png b/gtfs-book/images/block-03.png new file mode 100644 index 0000000..345cd72 Binary files /dev/null and b/gtfs-book/images/block-03.png differ diff --git a/gtfs-book/images/block-04.png b/gtfs-book/images/block-04.png new file mode 100644 index 0000000..ec384b5 Binary files /dev/null and b/gtfs-book/images/block-04.png differ diff --git a/gtfs-book/images/orthogonal-distance.jpeg b/gtfs-book/images/orthogonal-distance.jpeg new file mode 100644 index 0000000..9c88323 Binary files /dev/null and b/gtfs-book/images/orthogonal-distance.jpeg differ diff --git a/gtfs-book/images/shape-original.jpeg b/gtfs-book/images/shape-original.jpeg new file mode 100644 index 0000000..5d72050 Binary files /dev/null and b/gtfs-book/images/shape-original.jpeg differ diff --git a/gtfs-book/images/shape-reduced.jpeg b/gtfs-book/images/shape-reduced.jpeg new file mode 100644 index 0000000..cf6f7cd Binary files /dev/null and b/gtfs-book/images/shape-reduced.jpeg differ diff --git a/gtfs-book/images/stop-times.png b/gtfs-book/images/stop-times.png new file mode 100644 index 0000000..cdce53d Binary files /dev/null and b/gtfs-book/images/stop-times.png differ diff --git a/gtfs-book/images/stops-sample.png b/gtfs-book/images/stops-sample.png new file mode 100644 index 0000000..ec0e5c7 Binary files /dev/null and b/gtfs-book/images/stops-sample.png differ diff --git a/gtfs-realtime-book/ch-00-definitive-guide-to-gtfs-realtime.md b/gtfs-realtime-book/ch-00-definitive-guide-to-gtfs-realtime.md new file mode 100644 index 0000000..0184448 --- /dev/null +++ b/gtfs-realtime-book/ch-00-definitive-guide-to-gtfs-realtime.md @@ -0,0 +1,58 @@ +# The Definitive Guide to GTFS-realtime + +*How to consume and produce real-time public transportation data with the GTFS-rt specification.* + +Originally written by Quentin Zervaas. + +## About This Book + +This book is a comprehensive guide to GTFS-realtime, a specification for +publishing of real-time public transportation data. GTFS-realtime is +designed to complement the scheduled data that hundreds of transit +agencies around the world publish using GTFS (General Transit Feed +Specification). + +This book begins with a description of the specification, with +discussion about the three types of data contained in GTFS-realtime +feeds: service alerts; vehicle positions; and trip updates. + +Next the reader is introduced to *Protocol Buffers*, the data format +that GTFS-realtime uses when it is being transmitted. This section then +instructs the reader how to consume the three types of data from +GTFS-realtime feeds (both from standard feeds and feeds with +*extensions*). + +Finally, the reader is shown how to produce a GTFS-realtime feed. A +number of examples in this book use Java, but the lessons can be applied +to a number of different languages. + +This book complements *The Definitive Guide to GTFS*. + +### About The Author + +Quentin was the founder of TransitFeeds (now ), a web +site that provides a comprehensive listing of public transportation data +available around the world. This site is referenced various times +throughout this book. + +### Credits + +First Edition. Published in August 2015. + +**Technical Reviewer** + +Nick Maher + +**Copy Editors** + +Anne Zervaas +Miranda Little + +**Disclaimer** + +The information in this book is distributed on an "as is" basis, without +warranty. Although every precaution has been taken in the preparation of +this work, the author shall not be liable to any person or entity with +respect to any loss or damage caused or alleged to be caused directly or +indirectly by the information contained in this book. + diff --git a/gtfs-realtime-book/ch-01-introduction.md b/gtfs-realtime-book/ch-01-introduction.md new file mode 100644 index 0000000..eff0ddf --- /dev/null +++ b/gtfs-realtime-book/ch-01-introduction.md @@ -0,0 +1,81 @@ +## 1. Introduction to GTFS-realtime + +GTFS-realtime is a standard developed by Google in order to allow +transit agencies to provide real-time information about their service. + +There are three types of data a GTFS-realtime feed provides: + +1. Vehicle positions +2. Trip updates +3. Service alerts + +Vehicle positions contain data about events that have already occurred +(e.g. "the vehicle was at this location one minute ago"), whereas trip +updates contain data about events that are yet to occur (e.g. "the bus +will arrive in three minutes"). + +Typically, a single GTFS-realtime feed contains only one of these three +types of data. Many agencies therefore have multiple GTFS-realtime feeds +(that is, one for vehicle positions, one for trip updates and one for +service alerts). + +![GTFS-realtime structure](images/GTFS-realtime-structure.png) + +The above diagram shows how a GTFS-realtime feed is designed to +complement a GTFS (General Transit Feed Specification) feed. It does +this in two ways: + +1. All identifiers for routes, trips and stops match those that appear + in the corresponding GTFS feed. +2. A GTFS feed shows the projected schedule for a given period (such as + the next six months), while the GTFS-realtime is used to make + last-minute adjustments based on real-world conditions (such as + traffic, roadworks, or weather). + +### Consuming GTFS-realtime Feeds + +The format of GTFS-realtime feeds is based on Protocol Buffers, a +language and platform-neutral mechanism for serializing structured data. + +This is similar conceptually to JSON (JavaScript Object Notation), but +the data transferred across the wire is binary data and not +human-readable in its raw format. + +*Chapter 5. Protocol Buffers* shows you how to use Protocol +Buffers and the associated **gtfs-realtime.proto** file (used to +instruct Protocol Buffers how GTFS-realtime is structured). + +Consuming GTFS-realtime Feeds on Mobile Devices + +A common use-case for GTFS and GTFS-realtime feeds is to build +transit-related mobile apps that show scheduling data. However, it is +important to note that GTFS-realtime feeds are not intended to be +consumed directly by a mobile device. + +Many of the vehicle positions and trip update feeds provided by transit +agencies include a snapshot of their entire network at a single moment. +For a large network, this could be multiple megabytes of data being +updated every 10-15 seconds. + +A mobile app downloading a full GTFS-realtime feed every 10-15 seconds +would quickly download a large amount of data over their cellular +connection, which could be very expensive. Additionally, their device +would need to process a large amount of data -- most of which would not +be relevant -- which would run down their battery unnecessarily. This +would also put a huge amount of strain on the provider's servers. + +Rather, GTFS-realtime is intended to be consumed by an intermediate +server. In the case of a mobile app, this intermediate server would +likely belong to the creator of the app. The mobile app can then query +this intermediate server for the relevant data it needs at that time. + +![Direct or Intermediate](images/GTFS-realtime-direct-or-intermediate.png) + +The above diagram demonstrates the different models. On the left, mobile +devices download entire GTFS-realtime feeds from the provider. Each +device is downloading 5 megabytes every 10-15 seconds. + +On the right, an intermediate server records all vehicle positions, then +mobile devices request only the data they need. This significantly +reduces the amount of data transferred. + diff --git a/gtfs-realtime-book/ch-02-service-alerts.md b/gtfs-realtime-book/ch-02-service-alerts.md new file mode 100644 index 0000000..6fb81c5 --- /dev/null +++ b/gtfs-realtime-book/ch-02-service-alerts.md @@ -0,0 +1,363 @@ +## 2. Introduction to Service Alerts + +Service alerts are generally used by transit agencies to convey +information that can not be conveyed using a trip update message (the +GTFS-realtime trip update message is covered in *Chapter 4. Introduction +to Trip Updates*). + +For example, consider a bus stop that was to be closed for a period of +time due to construction in the area. If the stop was to be closed for a +long period of time, the transit agency could modify the long-term +schedule (the GTFS feed). If the closure was unexpected and the stop +will reopen later that day, the agency can reflect this temporary +closure using trip updates in the GTFS-realtime feed (instructing each +relevant trip to skip that stop). + +However, in both of these cases the reason *why* there were no trips +visiting the stop has not been conveyed. Using a service alert, you can +explain why the stop is closed and when it will reopen. + +You can attach one or more entities to a service alert (such as routes, +trips or stops). In the above example of a stop being closed, you could +include its stop ID as it appears in the corresponding GTFS feed, +thereby allowing apps consuming the feed to display a message to their +users. + +### Examples of Service Alerts + +Some examples of common service alerts used by agencies include: + +* **Holiday schedules.** If there is an upcoming holiday, agencies may + use service alerts to remind travelers that a holiday schedule will + be applied that day. +* **Stop closing.** If a stop is closed (temporarily or permanently) + customers may be notified using a service alert. In this instance, + the alert can be linked to the stop that is closing. If it is being + replaced by a new stop, the new stop may also be linked. +* **Route detour.** Service alerts can be used to indicate that for a + period of time in the future a route will be redirected, perhaps due + to a road closure. In this instance, the alert would link to stops + that lie on the closed part of the road, as well as to routes that + will detour as a result of the closure. +* **Change to schedule.** If an upcoming change to a schedule results + in far less (or far more) services operating a stop, service alerts + might be used to notify customers. +* **Vehicle broken down.** If a bus has broken down, or if electric + trains are not moving due to a power outage, passengers can be + notified using service alerts. In this instance, the alert could + link to a specific trip, or it could be more general and instead + link to its route or to the stops affected. + +The `EntitySelector` type described shows how service +alerts can be linked to routes, trips and stops accordingly. + +### Sample Feed + +The following extract is from the service alerts feed of TriMet in +Portland (). It contains a +single GTFS-realtime service alert *entity*. An entity contains either a +service alert, a vehicle position or a trip update. This service alert +indicates that a tree has fallen, causing potential delays to two +routes. + +**Note:** This extract has been converted from its binary format into a +human-readable version. *Outputting Human-Readable GTFS-realtime Feeds* +illustrates how this is achieved. + +``` +entity { + id: "35122" + + alert { + active_period { + start: 1415739180 + end: 1415786400 + } + + informed_entity { + route_id: "19" + route_type: 3 + } + + informed_entity { + route_id: "71" + route_type: 3 + } + + url { + translation { + text: "http://trimet.org/alerts/" + } + } + + description_text { + translation { + text: "Expect delays due to a tree down blocking northbound 52nd at Tolman. Police are flagging traffic thru using the southbound lane." + } + } + } +} +``` + +The elements of this service alert entity are as follows. + +* Active Period +* Informed Entity +* URL +* Description text + +Each of these fields are discussed below, as are some ways this +particular feed could be improved. + +### Active Period + +The active period element in this example states that the alert is +active between 12:53 PM on one day and 2:00 AM the following day +(Portland time). It is likely they included this finishing time to say +"this might last all day, but it definitely won't be a problem +tomorrow". + +Often precise timing isn't known. If the active period is omitted, then +the alert is assumed to be active for as long as it appears in the feed. + +### Informed Entity + +In a GTFS-realtime service alerts feed, *informed entity* refers to a +route, trip, stop, agency or route type, or any combination of these. + +In this example, there are two informed entities, both of which are bus +routes (as indicated by a route type of `3`). Referring to the TriMet +GTFS feed (), the routes +with an ID of `19` and `71` are as follows. + +```csv +route_id,route_short_name,route_long_name,route_type +19,19,Woodstock/Glisan,3 +71,71,60th Ave/122nd Ave,3 +``` + +Technically in this case the `route_type` value need not be specified, +as this can be derived using the GTFS feed. Sometimes, however, an alert +may impact *all* routes for a given mode of transport, so the route type +would be specified only. + +For instance, if an electrical outage affects all subway trains, then an +informed entity containing only a route type of `1` (the GTFS value +for Subway routes) would be sufficient, rather than including a service +alert for each subway route. + +### URL + +This field contains a web address where additional information about the +alert can be found. + +In this particular example, TriMet has included a generic URL for their +service alert. Other alerts in the same feed also use the same URL. It +would be far more useful if instead each alert pointed to a URL +specifically related to that alert. This would make it easier to provide +the end user with additional information specific to that alert. + +### Description Text + +This field contains a textual description of the alert that can be +presented to the users of your web site or app. Just like the URL field, +it has a type of `TranslatedString`, which is the mechanism by which +GTFS-realtime can provide translations in multiple languages if +required. + +### Improvements + +In addition to providing a more specific URL, this alert could be +improved by including values for the `cause` and `effect` fields. +For instance, `cause` could have a value of `WEATHER` or +`ACCIDENT`, while `effect` could have a value of `DETOUR` or +`SIGNIFICANT_DELAYS`. + +Additionally, this alert could include the `header_text` field to +complement the `description_text` field. This would allow you to +display a summary of the alert (`header_text`), then supply more +information if the user of your app or web site requests it +(`description_text`). + +### Specification + +This section contains the specification for the `Alert` entity type. +Some of this information has been sourced from the GTFS-realtime +reference page +(). + +### Alert + +An `Alert` message makes it possible to provide extensive information +about a given service alert, including the ability to match it to any +number of routes, stops or trips. + +The fields for any single service alert are as described in the +following table. + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `active_period` | `TimeRange` | Zero or more occurrences | The time or times when this alert should be displayed to the user. If no times are specified, then the alert should be considered active as long as it appears in the feed. Sometimes there may be multiple active periods specified. For example, if some construction was occurring daily between a certain time, there might be an active period record for each day it will occur.| | | +| `informed_entity` | `EntitySelector` | Zero or more occurrences | These are the entities this service alert relates to (such as route, trips or stops). | | | +| `cause` | `Cause` | Optional | This is the event that occurred to trigger the alert. Possible values for the `Cause` enumerator are listed below this table. | | | +| `effect` | `Effect` | Optional | This indicates what action was taken as a result of the incident. Possible values for the `Effect` enumerator are listed below this table. | | | +| `url` | `TranslatedString` | Optional | A URL which provides additional information that can be shown to users. | | | +| `header_text` | `TranslatedString` | Optional | A brief summary of the alert that can be used as a heading. This is to be in plain-text (no HTML markup). | | | +| `description_text` | `TranslatedString` | Optional | A description of the alert that complements the header text. Similarly, it is to be in plain-text (no HTML markup). | | | + +The following values are valid for the `Cause` enumerator: + +* `ACCIDENT` +* `CONSTRUCTION` +* `DEMONSTRATION` +* `HOLIDAY` +* `MAINTENANCE` +* `MEDICAL_EMERGENCY` +* `POLICE_ACTIVITY` +* `STRIKE` +* `TECHNICAL_PROBLEM` +* `WEATHER` +* `UNKNOWN_CAUSE` +* `OTHER_CAUSE` + +The following values are valid for the `Effect` enumerator: + +* `ADDITIONAL_SERVICE` +* `NO_SERVICE` +* `SIGNIFICANT_DELAYS` +* `DETOUR` +* `STOP_MOVED` +* `MODIFIED_SERVICE` +* `REDUCED_SERVICE` +* `UNKNOWN_EFFECT` +* `OTHER_EFFECT` + +### TimeRange + +A `TimeRange` message specifies a time interval. It is not mandatory +to include both the start and finish times, but at least one of those is +required if a `TimeRange` is included. + +| Field | Type | Frequency | Description | +| :------ | :--- | :-------- | :---------- | +| `start` | `uint64` (64-bit unsigned integer) | Optional | The start time specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | +| `end` | `uint64` (64-bit unsigned integer) | Optional | The end time specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | + +If only the start time is specified, the time range is considered active +after the starting time. + +If only the end time is specified, the time range is considered active +before the end time. + +If both the start and finish times are specified, the time range is +considered active between these times. + +EntitySelector + +An `EntitySelector` message is used to specify an entity from within a +GTFS feed. Doing so allows you to match up a service alert with a route +(or all routes of a given type), trip, stop or agency from the GTFS feed +that corresponds to the GTFS-realtime feed. + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `agency_id` | `string` | Optional | This is the ID of an agency as it appears in the `agency.txt` file of the corresponding GTFS feed. | +| `route_id` | `string` | Optional | This is the ID of a route as it appears in the `routes.txt` file of the corresponding GTFS feed. | +| `route_type` | `int32` (32-bit signed integer) | Optional | This is a GTFS route type, such as `3` for bus routes or `4` for ferry routes. Extended GTFS route types can also be used for this value. | +| `trip` | `TripDescriptor` | Optional | This is used to match a specific trip from the corresponding GTFS feed's `trips.txt` file. Trip matching can be potentially more complex than just matching the `trip_id`, which is why this field differs to the other ID-related fields in `EntitySelector`. You can find more discussion of this below in the `TripDescriptor` section. | +| `stop_id` | `string` | Optional | This is the ID of a stop as it appears in the `stops.txt` file of the corresponding GTFS feed. If the corresponding stop is of type "station" (a `location_type` value of `1`), then you may consider matching this entity to its child stops also. | + +All of these elements are optional, but at least one of them must occur. +If multiple elements are specified, then all must be matched. + +***Note:** Conversely, if you want multiple matches, then you should +instead include multiple EntitySelector values. For instance, if you +want a service alert that covers all buses and ferries, then the +informed_entity field would contain one EntitySelector for buses, and +another for ferries.* + +The following table shows some different combinations that can occur, +and what each of them mean. + +| Fields Specified | Meaning | +| :----------------------- | :--------- | +| `agency_id` | The alert applies to anything relating to the given agency. This may include any routes or trips that match back to the agency, or even stops that the trips stop at. | +| `route_id` | The alert applies to the given route. For instance, if a user is viewing upcoming departures for the matched route, then it would be appropriate to display the alert. | +| `route_type` | The alert is relevant when showing the user any data related to the given route type. For example, if a route type of `3` (buses) is specified, then it would be appropriate to display the alert when a user is viewing upcoming departures for any bus route in the corresponding GTFS feed. | +| `trip` | If a trip is matched, then it would be appropriate to display the alert when the user is viewing anything related to that trip. For instance, if you are showing a list of stop times for the trip then it would be relevant. If you have received a real-time vehicle position for the trip and are showing it to the user on a map, you might show the service alert if the user taps on the vehicle. | +| `stop_id` | If a stop is matched here then it would be appropriate to show an alert in a number of situations, such as when viewing upcoming departures for the stop, or if the user is taking a trip that embarks or disembarks at the matched stop. | +| `agency_id` + `route_id` | This kind of match is redundant, because there should only ever be a maximum of one route that matches a given `route_id` value in a GTFS feed. | +| `route_id` + `trip` | Similar to the previous case, any matched trip will only belong to a single route, so specifying the `route_id` has no real meaning. | +| `route_id` + `stop_id` | Matching both a route and a stop can be useful if an alert relating to a stop only applies to certain routes. For instance, if a stop is serviced by two different routes and you want to notify users that one of the routes will no longer stop here, the alert does not apply to the route that will continue to service the stop. | +| `trip` + `stop_id` | In this case, a service alert is matched to a combination of a trip and a stop. The alert would be relevant to a user waiting at a stop for a particular vehicle. It would not apply to other people at the same stop waiting for a different route. | +| `route_type` + `stop_id` | Sometimes a stop is shared by multiple travel modes. For instance, some light rail services share stops with buses. This combination can be useful if a stop-related alert only applies to one of those modes. | + +As this demonstrates, it is possible to match service alerts to +real-world entities in any number of ways. This allows you to keep +relevant users informed. The alternative to matching on this granular +level would be to show all of your users all service alerts, meaning +most alerts would be irrelevant to most people. + +### TripDescriptor + +One of the files in a GTFS feed is `frequencies.txt`, which is used to +specify trips that repeat every *x* minutes. This file is used when an +agency does not have a specific schedule for trips, other than +guaranteeing, for instance, that a new trip departs every five minutes. + +For example, it is possible for a particular route to run every five +minutes for an entire day, while only having one entry in `trips.txt` +(and one set of corresponding stop times in `stop_times.txt`). + +When using trip frequencies the `trip_id` value may not be enough to +uniquely identify a single trip from the GTFS feed. This means that in +order to match a trip, additional information may need to be supplied, +which the `TripDescriptor` message allows for. + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `trip_id` | `string` | Optional | This is the ID of a trip as it appears in the `trips.txt` of the corresponding GTFS feed. Alternatively, this value may refer to a trip that has been added via a `TripUpdate` message and does not exist in the GTFS feed. | +| `route_id` | `string` | Optional | If this value is specified, it should match the route ID for the trip specified in `trip_id`. If the `route_id` is specified but no `trip_id` is specified, then this trip descriptor references all trips for the given route. | +| `direction_id` | `uint32` (32-bit unsigned integer) | Optional | This value corresponds to the `direction_id` value as specified in the `trips.txt` file of the corresponding GTFS feed. At time of writing this is an experimental field in the GTFS-realtime specification. | +| `start_time` | `string` | Optional | If the specified trip in `trip_id` is a frequency-expanded trip, this value must be specified in order to determine which instance of a trip this selector refers to. Its value is in the format `HH:MM:SS`, as in the `stop_times.txt` and `frequencies.txt` files. | +| `start_date` | `string` | Optional | It is possible that knowing the `trip_id` may not be enough to determine a specific trip. For instance, if a train is scheduled to depart at 11:30 PM but is running 40 minutes late, then you would need to know its date in order to match up with the original trip (40 minutes late), and not the next day's instance of the trip (23 hours 20 minutes early). This field helps to avoid this ambiguity. The date is specified in `YYYYMMDD` format. | +| `schedule_relationship` | `ScheduleRelationship` | Optional | This value indicates the relationship between the trip(s) specified in this selector and its regular schedule. | + +The following values are valid for the `ScheduleRelationship` +enumerator: + +* `SCHEDULED`. Used when the trip being described is running in + accordance with a trip in the GTFS feed. +* `ADDED`. A trip that was added in addition to the schedule. For + instance, if an extra trip was added because there were more + passengers than normal, it would be represented using this value. +* `UNSCHEDULED`. A trip that is running with no schedule associated + with it. For instance, if this trip is expected to run but there is + no static schedule associated with it, it would be marked with this + value. +* `CANCELED`. A trip that existed in the schedule but was removed. + For example, if a vehicle broke down and could not complete the + trip, then it would be marked as canceled. + +If a trip has been added, then the `route_id` should be populated, as +without this it may not be possible to determine which route the added +trip corresponds to (since the `trip_id` value would not appear in the +GTFS `trips.txt` file). + +With the newly-added `direction_id` field (still experimental at time +of writing this book), an added trip can also have its direction +specified, meaning you can present information to your users about which +direction the vehicle is traveling, even if you do not know its specific +stops. + +### TranslatedString + +A `TranslatedString` message contains one or more `Translation` +elements. This allows for alerts to be issued in multiple languages. A +`Translation` element is structured as follows. + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `text` | `string` | Optional | A UTF-8 string containing the message. This string will typically be read by the users of your web site or app. | +| `language` | `string` | Optional | This is the language code for the given text (such as `en-US` for United States English). It can be omitted, but if there are multiple translations then at most only one translation can have this value omitted. | + diff --git a/gtfs-realtime-book/ch-03-vehicle-positions.md b/gtfs-realtime-book/ch-03-vehicle-positions.md new file mode 100644 index 0000000..f247ae7 --- /dev/null +++ b/gtfs-realtime-book/ch-03-vehicle-positions.md @@ -0,0 +1,317 @@ +## 3. Introduction to Vehicle Positions + +A vehicle position message communicates the physical location of a bus, +train, ferry or otherwise. In addition to location of the vehicle, it +can also provide information about the vehicle's speed, bearing (the +direction it is facing), and how to match up the vehicle with a trip in +the static schedule. + +A recent addition to the GTFS-realtime specification (experimental at +time of writing -- see +) is the +ability to indicate how full a vehicle is. Although this element is not +yet formally a part of the specification, it has been included in this +book so it aligns with current documentation. + +### Sample Feed + +The following extract is from the vehicle position feed of MBTA in +Boston (). MBTA also provide separate +feeds for service alerts and trip updates. + +This extract contains a single GTFS-realtime entity, which represents a +single vehicle position. + +``` +entity { + id: "v1211" + + vehicle { + trip { + trip_id: "25906883" + start_date: "20150117" + schedule_relationship: SCHEDULED + route_id: "28" + } + + position { + latitude: 42.267967 + longitude: -71.093834 + } + + current_stop_sequence: 35 + timestamp: 1421565564 + stop_id: "1721" + + vehicle { + id: "y2189" + label: "2189" + } + } +} +``` + +***Note:** This extract has been converted from its binary format into a +human-readable version. *Outputting Human-Readable GTFS-realtime Feeds* +shows you how this is achieved.* + +Rendering the vehicle position and its path on a map along with the +referenced stop looks as follows. + +![Vehicle Position](images/vehicle-position.png) + +The elements of the vehicle position are described below. The outer +`vehicle` element in this entity is of type `VehiclePosition`. The +inner `vehicle` element is a `VehicleDescriptor`, which is described +shortly. + +### Trip + +If specified, this element is used to link the vehicle position to a +specific trip in the corresponding GTFS feed, or to a trip that has been +added to the schedule. Using MBTA's GTFS feed, you can determine that +the trip can be matched to the record below. You can find this in the +trips.txt file at +. + +| `route_id` | `service_id` | `trip_id` | `trip_headsign` | +| :--------- | :----------------------------- | :-------- | :-------------- | +| 28 | BUSS12015-hbs15no6-Saturday-02 | 25906883 | Mattapan Station via Dudley Station | + +**Note:** If the schedule_relationship value was ADDED or UNSCHEDULED, +there would not have been a corresponding record in trips.txt. + +If you then look up the trip's records in stop_times.txt, you can +determine the trip begins at 25:45:00. This means the trip begins at +1:45 AM on the morning following its service date. In this case, the +start date in the vehicle position is specified as January 17. This +means that this trip actually takes place on the morning of January 18. +If the start date was not included with the vehicle position, it may +have been difficult to determine the specific trip being referenced. + +### Position + +This element contains the geographic location of the vehicle. In this +instance, only the latitude and longitude are specified. It is also +possible to include the vehicle's bearing, odometer and speed, however +only that latitude and longitude are required. + +### Current Stop + +A vehicle position can include information about its position relative +to its current or next stop. + +The status of the stop is indicated by the `current_status` value. In +this example, the `current_status` is not specified, which means the +vehicle is currently in transit to the stop (in other words, it is not +stopped there, nor is it about to stop). + +The stop referred to in this instance has a `stop_id` of `1721`. +Referring once again to the MBTA GTFS feed +(), this stop is as +follows. + +| `stop_id` | `stop_code` | `stop_name` | `stop_lat` | `stop_lon` | +| :-------- | :---------- | :------------------------ | :--------- | :--------- | +| 1721 | 1721 | Blue Hill Ave @ River St | 42.267151 | -71.09362 | + +The other value used to identify the upcoming or current stop is the +`current_stop_sequence` value. This refers to the `stop_sequence` +value in `stop_times.txt`. + +***Note:** Technically, you can infer the stop based on the trip and +current_stop_sequence value, so you do not strictly need the stop_id +value. However, in some cases it may not be possible to identify the +trip (and therefore not be able to infer the specific stop), so having +the `stop_id` value available in the vehicle position is useful.* + +By looking up the stop ID and trip ID in `stop_times.txt`, you can +locate the following entry: + +| `trip_id` | `stop_sequence` | `stop_id` | `arrival_time` | `departure_time` | +| :-------- | :-------------- | :-------- | :------------- | :--------------- | +| 25906883 | 35 | 1721 | 26:14:00 | 26:14:00 | + +***Note:** Remember that an hour value of 26 corresponds to 2 AM on the +following day (in this instance, the trip value specifies the trip's +date as January 17, so this stop time is 2 AM on January 18).* + +In plain English, this can be interpreted as "the vehicle is currently +in transit to stop 1721, scheduled to arrive at 2:14 AM." Note however, +that the timestamp value corresponds to 2:19 AM, meaning the bus is +about 5 minutes late. + +Tip: You can quickly find the human-readable version of a timestamp +using the command-line tool date. You may need to set the local timezone +first. In this instance, Boston's timezone can be set using export +TZ=America/New_York. You can then use date -r 1421565564 to find the +value of Sun 18 Jan 2015 02:19:24 EST. + +### Vehicle Descriptor + +The vehicle descriptor provides information to identify the specific +vehicle. In this example, the internal vehicle identifier is `y2189`. +It should remain consistent for this particular vehicle across the +system. Any subsequent vehicle positions or trip updates that refer to +this vehicle should use the same identifier. + +The vehicle ID value is not intended to be presented to end-users. +Instead, the label field should be used. The label could refer to a +particular train number, or perhaps a number painted on the side of a +bus. In the case of vehicle 2189, the number appears as in the following +photograph. + +![MBTA Bus](images/mbta_bus.png) + +The other piece of identifying information that can be presented to +users is the license plate of the vehicle. The MBTA's feed doesn't +specify this value, presumably because it duplicates the `label` +value. + +### Improvements + +Although this sample vehicle position contains the most pertinent +information (the coordinates of the vehicle and its corresponding trip), +knowing the direction that the vehicle is facing can also be useful. + +A common way of presenting vehicle positions on a map is to show all +positions for a given route on a map. If you can provide this extra +piece of information, a passenger can look at a map of all vehicle +positions for a given route and determine which are traveling in their +desired direction, and which are traveling in the opposite direction. + +***Note:** When you can match up a vehicle position to a specific trip, +it may be possible to filter which vehicles appear on the map using the +`direction_id` value for the trip. In some instances though, you may only +know the route of a vehicle and not its specific trip.* + +*Chapter 7. Consuming Vehicle Positions* shows you how to +determine the bearing of a vehicle if it is not included by the data +provider. + +### Specification + +This section contains the specification for the `VehiclePosition` +entity type. Some of this information has been sourced from the +GTFS-realtime reference page +(). + +### VehiclePosition + +A `VehiclePosition` element is used to specify the geographic position +and other attributes of a single vehicle, as well as providing +information to match that vehicle back to the corresponding GTFS feed. + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `trip` | `TripDescriptor` | Optional | This is used to match the vehicle position to a specific trip from `trips.txt` in the corresponding GTFS feed. | +| `vehicle` | `VehicleDescriptor` | Optional | This element provides information that can be used to identify a particular vehicle. | +| `position` | `Position` | Optional | The vehicle's geographic location, bearing and speed are specified using the `Position` type. | +| `current_stop_sequence` | `int32` (32-bit signed integer) | Optional | The sequence of the current stop, as it appears in the `stop_sequence` value for the trip matched in the corresponding `stop_times.txt` file. | +| `stop_id` | `string` | Optional | This is used to identify the current stop. If specified, the `stop_id` value must correspond to an entry in the `stops.txt` file of the corresponding GTFS feed. | +| `current_status` | `VehicleStopStatus` | Optional | If the current stop is specified (using `current_stop_sequence`), this value specifies what the "current stop" means. If this value isn't specified, it is assumed to be `IN_TRANSIT_TO`. | +| `timestamp` | `uint64` (64 bit unsigned integer) | Optional | This value refers to the moment at which the vehicle's position was measured, specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | +| `congestion_level` | `CongestionLevel` | Optional | This value indicates the status of the traffic flow the vehicle is currently experiencing. The possible values for this element are listed below this table. | +| `occupancy_status` | `OccupancyStatus` | Optional | At time of writing, this field is experimental only. If specified, it indicates how full a given vehicle is. | + +The possible values for the `VehicleStopStatus` enumerator are as +follows: + +| Value | Description | +| :---- | :---------- | +| `INCOMING_AT` | The vehicle is just about to arrive at the specified stop. In some vehicles, there is a visual display or audio announcement when approaching the next stop. This could correspond with `current_status` changing from `IN_TRANSIT_TO` to `INCOMING_AT`. | +| `STOPPED_AT` | The vehicle is currently stationary at the stop. Once it departs the `current_status` would update to `IN_TRANSIT_TO`. | +| `IN_TRANSIT_TO` | The vehicle has departed the previous stop and is on its way to the specified stop. This is the default value if `current_status` is not specified. | + +The possible values for the `CongestionLevel` enumerator are as +follows: + +| Value | Description | +| :---- | :---------- | +| `UNKNOWN_CONGESTION_LEVEL` | If the congestion level is not specified, then this is the default value. | +| `RUNNING_SMOOTHLY` | Traffic is flowing smoothly. | +| `STOP_AND_GO` | Traffic is flowing, but not smoothly. | +| `CONGESTION` | The vehicle is experiencing some level of congestion, and therefore likely to be moving very slowly. | +| `SEVERE_CONGESTION` | The vehicle is experiencing a high level of congestion, and therefore likely to be not moving. | + +While this information can be useful to present to the user, it does not +allow you to make any inference as to whether the vehicle will adhere to +its schedule. Schedules are often designed to account for levels of +congestion, depending on the time of day. + +For this value to be useful in telling a user why their vehicle may be +late, the GTFS `stop_times.txt` would likely also need a field to +indicate the expected congestion level for any given stop time. +Realistically though, this is where the `TripUpdate` element comes +into play. This is covered in *Chapter 4. Introduction to Trip Updates*. + +### TripDescriptor + +The meaning of the trip descriptor differs slightly for a vehicle +position than for a service alert. In a service alert, if the +`route_id` is specified but the `trip_id` is not, then the service +alert applies to all trips for that route. + +In the case of a vehicle position, if the `route_id` is specified but +not the `trip_id`, then it means the vehicle position corresponds to +"some" trip for that route, not "all" trips (it does not make sense for +it to apply to all trips). + +This means that if a user wants to know the vehicle positions for a +given route, you can show them all known positions, even if you are +unable to match the trip back to a trip in the corresponding GTFS feed. + +Refer to Chapter 4 for a description of all elements in a +`TripDescriptor`. + +### VehicleDescriptor + +This element is used to identify a specific vehicle, both internally and +for passengers. Every single vehicle in the system must have its own +identifier, and it should carry across all vehicle positions and trip +updates that correspond to the specific vehicle. + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `id` | `string` | Optional | A unique identifier for a vehicle. This value is not intended to be shown to passengers, but rather for identifying the vehicle internally. | +| `label` | `string` | Optional | A label that identifies the vehicle to passengers. Unlike the `id` value, this value may be repeated for multiple vehicles, and it may change for a given vehicle over the course of a trip or series of trips. This might correspond to a route number that is displayed on a bus, or a particular train number, or some other identifier that passengers can see. | +| `license_plate` | `string` | Optional | The license plate of the vehicle. | + +### Position + +This element specifies the geographic position of a vehicle, as well as +related attributes such as bearing and speed. + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `latitude` | `float` | Required | The latitude of the vehicle (a number in the range of `-90` to `90`). | +| `longitude` | `float` | Required | The longitude of the vehicle (a number in the range of `-180` to `180`). | +| `bearing` | `float` | Optional | Degrees, clockwise from True North. 0 is North, 90 is East, 180 is South, 270 is West. This can be either the direction the vehicle is facing, or the direction towards the next stop (GTFS-realtime does not provide a mechanism to determine which). | +| `odometer` | `double` | Optional | A measure of distance in meters. The GTFS-realtime specification does not state exactly what this value should represent. It could represent either the total number of meters the vehicle has ever travelled, or the number of meters travelled since the beginning of its current trip. | +| `speed` | `float` | Optional | The speed of the vehicle at the time of the reading, in meters per second. | + +While the latitude and longitude are the most important pieces of +information in this element, the vehicle's bearing can also be useful +to know. *Determining a Vehicle's Bearing* shows you how to +determine the bearing if it is not specified. + +### OccupancyStatus + +***Warning:** At time of writing the OccupancyStatus enumerator is +considered experimental only.* + +This enumerator is used for indicating how full a vehicle is. This can +be useful for warning passengers waiting for this vehicle that they may +not be able to fit and should instead attempt to use a different +vehicle. + +| Value | Description | +| :---- | :---------- | +| `EMPTY` | Used to indicate there are no (or very few) passengers on board. | +| `MANY_SEATS_AVAILABLE` | The vehicle is not empty, but it has many seats available. | +| `FEW_SEATS_AVAILABLE` | The vehicle has some seats available and is still accepting passengers. | +| `STANDING_ROOM_AVAILABLE` | The vehicle is still accepting passengers, but they will have to stand. | +| `CRUSHED_STANDING_ROOM_ONLY` | The vehicle is still accepting passengers, but they will have to stand and there is very limited space. | +| `FULL` | The vehicle is considered full but may still be accepting new passengers | +| `NOT_ACCEPTING_PASSENGERS` | The vehicle is not accepting new passengers. | + diff --git a/gtfs-realtime-book/ch-04-trip-updates.md b/gtfs-realtime-book/ch-04-trip-updates.md new file mode 100644 index 0000000..3269fb4 --- /dev/null +++ b/gtfs-realtime-book/ch-04-trip-updates.md @@ -0,0 +1,217 @@ +## 4. Introduction to Trip Updates + +A trip update message is used to report the progress of a vehicle along +its trip. Each trip may only have one trip update message in a +GTFS-realtime feed. + +A trip update can report that a trip has been canceled, or it can update +the progress of any number of stops on the trip. For example, a trip +update may contain an arrival estimate only for the vehicle's next +stop, or it may contain estimates for every remaining stop on the trip. + +If a trip does not have a trip update message, this should be +interpreted as there being no real-time information available; not that +it is necessarily progressing as scheduled. + +### Sample Feed + +The following extract is from the MBTA trip update feed +(). MBTA also provide separate feeds +for service alerts and vehicle positions. + +This extract contains a single GTFS-realtime entity, which represents a +bus that is four minutes behind schedule (a `delay` value of `240` +seconds). + +``` +entity { + id: "25732950" + + trip_update { + trip { + trip_id: "25732950" + start_date: "20150120" + schedule_relationship: SCHEDULED + route_id: "08" + } + + stop_time_update { + stop_sequence: 43 + arrival { + delay: 240 + } + stop_id: "135" + } + + vehicle { + id: "y2189" + label: "2189" + } + } +} +``` + +***Note:** This extract has been converted from its binary format into a +human-readable version. *Outputting Human-Readable GTFS-realtime Feeds* +shows you how this is achieved.* + +The elements of a `trip_update` entity are as follows. + +### Trip + +This element is used to identify the particular trip that a trip update +applies to. In this instance, the trip has an ID of `25732950`, +running on the service day of 20 January 2015. + +![Boston Trip](images/boston-trip.png) + +***Note:** Although this trip may no longer be active, you can view +similar trips at .* + +Since the `schedule_relationship` value is `SCHEDULED`, this trip +corresponds to a trip in the MBTA GTFS file +(). + +If the `schedule_relationship` value is `ADDED`, then this +corresponds to a new trip for the route with an ID of `08`. The +`stop_time_added` field would likely then contain an entry for each +stop on the added trip. + +***Note:** The trip could also be marked as CANCELED. If so, the trip +could either be in the GTFS feed or it may have been added through a +previous trip update message.* + +### Stop Time Update + +The `stop_time_update` elements contains information specific to a +stop on the trip. It is repeated for each stop that there is information +for. If the trip has been canceled (indicated by a +`schedule_relationship` value of `CANCELED`) then there will no +`stop_time_update` elements. + +In the above sample, there is a single update, corresponding to the stop +with an ID of `135`. You can look up the details of this stop at +. This stop has a +stop sequence of 43, as shown in the following figure. + +![Map](images/stoptime_map.png) + +Referring to the `stop_times.txt` file in the GTFS feed, the scheduled +arrival time for this trip at stop 135 is 6:12 PM. The delay value +indicates that it will be four minutes late (240 seconds), meaning it +will now arrive at 6:16 PM. + +![Remaining Stops](images/remaining_stops.png) + +As there are no additional stop time updates for subsequent updates, it +can be assumed that this delay carries through to the rest of the trip. +There are eight remaining stops after this one, so all of those will +also be four minutes late. + +### Vehicle + +The vehicle information is useful as it enables you to identify specific +vehicles. In this instance, MBTA use the same identifier both as their +internal identifier and also as the identifier printed on the bus. This +sample once again refers to bus `2189`. The photograph in *Chapter 3: Vehicle Positions* +shows how this number appears on the vehicles. + +### Specification + +This section contains the specification for the `VehiclePosition` +entity type. Some of this information has been sourced from the +GTFS-realtime reference page +(). + +### TripUpdate + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `trip` | `TripDescriptor` | Optional | This element is used to match the referenced trip to `trips.txt` file from the corresponding GTFS feed. | +| `vehicle` | `VehicleDescriptor` | Optional | This element provides information that can be used to identify a particular vehicle. | +| `stop_time_update` | `StopTimeUpdate` | Repeated | This element contains one or more instances of `StopTimeUpdate`. Each occurrence represents a prediction for a single stop. They must be in order of their stop sequence. | +| `timestamp` | `uint64` (64-bit unsigned integer) | Optional | This value refers to the moment at which the real-time progress was measured, specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | +| `delay` | `int32` (32-bit signed integer) | Optional | This value is only experimental at time of writing. It is used to indicate the number of seconds the vehicle is either early (negative number) or late (positive number). Estimates specified within `StopTimeUpdate` elements take precedence over this value. | + +### TripDescriptor + +Identifying a trip in a trip update is slightly different to identifying +a trip in a service alert or vehicle position message. With vehicle +positions and service alerts, the trip descriptor may refer to an +arbitrary trip for a given route, but to do so with trip updates does +not make sense. + +With trip updates, you must be able to identify a specific trip from the +corresponding GTFS feed. This is because trip updates will often only +include an update for a single stop, and you must therefore determine +subsequent stop times for a given trip so those can be adjusted +accordingly. To do so, you need to be able to find a specific trip and +its corresponding stop times in the GTFS feed. + +This differs to a service alert where you can apply an alert to all +trips for a given route, rather than one at a specific time. It also +differs to vehicle positions, where being able to see all positions for +a route on a map is useful, even if you do not know the specific trip +each position corresponds to. + +### VehicleDescriptor + +This element is used to identify a specific vehicle, both internally and +for passengers. Every single vehicle in the system must have its own +identifier, and it should carry across all vehicle positions and trip +updates that correspond to the specific vehicle. + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `id` | `string` | Optional | A unique identifier for a vehicle. This value is not intended to be shown to passengers, but rather for identifying the vehicle internally. | +| `label` | `string` | Optional | A label that identifies the vehicle to passengers. Unlike the `id` value, this value may be repeated for multiple vehicles, and it may change for a given vehicle over the course of a trip or series of trips. This might correspond to a route number that is displayed on a bus, or a particular train number, or some other identifier that passengers can see. | +| `license_plate` | `string` | Optional | The license plate of the vehicle. | + +### StopTimeUpdate + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `stop_sequence` | `uint32` (32-bit unsigned integer) | Optional | In GTFS feeds, the order of stops in a trip is indicated by the `stop_sequence` value in `stop_times.txt`. If specified, the value specified in the `StopTimeUpdate` must match the value from the GTFS feed. It is possible for a single trip to make multiple visits to a single stop (for example, if it's a loop service), so this value is important. | +| `stop_id` | `string` | Optional | This value corresponds to a single stop from the associated GTFS feed. Using this value and the `stop_sequence` value, it is possible to pinpoint a specific record from `stop_times.txt` that this `StopTimeUpdate` element alters. | +| `arrival` | `StopTimeEvent` | Optional | Specifies the updated arrival time. If the `schedule_relationship` is `SCHEDULED`, then this field and/or `departure` must be specified. | +| `departure` | `StopTimeEvent` | Optional | Specifies the updated departure time. If the `schedule_relationship` is `SCHEDULED`, then this field and/or `arrival` must be specified. | +| `schedule_relationship` | `ScheduleRelationship` | Optional | If no value is specified, this defaults to `SCHEDULED`. Other possible values and their meanings are as described below. | + +Valid values for the `ScheduleRelationship` enumerator are: + +| Value | Description | +| :---- | :---------- | +| `SCHEDULED` | Indicates this stop occurs in accordance with the scheduled trip, although the arrival or departure times may be different from the times listed in the GTFS `stop_times.txt` file. | +| `SKIPPED` | Indicates that the corresponding stop will be skipped for the given trip. The arrival or departure times may still be included, but the vehicle will not be stopping. | +| `NO_DATA` | This is the value that should be used if no real-time information is available for this stop. In this case, neither `arrival` nor `departure` should be specified (if they are, you can safely ignore them). | + +### StopTimeEvent + +| Field | Type | Frequency | Description | +| :---- | :--- | :-------- | :---------- | +| `delay` | `int32` (32-bit signed integer) | Optional | The number of seconds that a vehicle is early (a negative value) or late (a positive value). A value of `0` indicates the vehicle is exactly on time. | +| `time` | `int64` (64-bit signed integer) | Optional | The time of the arrival or departure, specified in number of seconds since 1-Jan-1970 00:00:00 UTC. | +| `uncertainty` | `int32` (32-bit signed integer) | Optional | Represents the level of uncertainty attached to this prediction in seconds. A value of `0` means is it completely certain, while an omitted value means an unknown level of uncertainty. | + +Either the delay or exact time must be specified. If both are specified, +then the scheduled time in GTFS added to the delay should equal the +`time` value. If it does not, just the `time` value can be used. + +Conversely, if the `delay` value is not specified, you can calculate +it by subtracting the GTFS scheduled time from the predicted `time` +value. + +***Note:** Your interpretation of what constitutes a delay is likely to +depend on how you are presenting real-time data. For instance, if you +present arrivals to your users as "Early", "On-Time" and "Late", it is +likely to be more useful to your users to indicate a 30-second delay as +being "On-Time" rather than "Late".* + +The `uncertainty` field is used to indicate the accuracy of the +prediction. For example, consider a prediction that indicates a bus will +be five minutes late. If the transit agency thinks the prediction is +within a minute on either side of five minutes (say, 4-6 minutes late), +then the uncertainty value is the difference between the minimum and +maximum value. In this example, the uncertainty is 2 minutes -- a value +of `120` seconds. + diff --git a/gtfs-realtime-book/ch-05-protocol-buffers.md b/gtfs-realtime-book/ch-05-protocol-buffers.md new file mode 100644 index 0000000..2059a63 --- /dev/null +++ b/gtfs-realtime-book/ch-05-protocol-buffers.md @@ -0,0 +1,379 @@ +## 5. Protocol Buffers + +The previous chapters have included extracts from GTFS-realtime feeds in +a human-readable format. This data is actually represented using a data +format called *Protocol Buffers*. + +Developed by Google and initially released in 2008, Protocol Buffers are +a way of serializing structured data into a format which is intended to +be smaller and faster than XML. + +Note: Remember, if you are writing a transit-related mobile app, +GTFS-realtime feeds are not intended to be consumed directly by mobile +devices due to the large amount of data transferred. Rather, you will +need an intermediate server to read the feed from the provider then +serve only relevant data to the mobile devices running your app. + +Even though it looks similar to JSON data, the human-readable version of +a protocol buffer is not intended to be manually parsed. Instead, data +is extracted from a protocol buffer using native language (such as Java, +C++ or Python). + +Note: Although the Protocol Buffers application can generate code in +Java, C++ or Python, all code examples in this book will be in Java. + +For example, assume you have written a Java program that reads and +parses a GTFS-realtime service alerts feed (shown later in this chapter, +and in the next chapter). + +In order to consume a GTFS-realtime feed provided by a transit agency +such as TriMet or MBTA, your workflow would look similar to the +following diagram: + +![GTFS-realtime Consumption](images/GTFS-realtime-consumption.png) + +When a transit agency or data provider want to publish a GTFS-realtime +feed, their process would be similar, except instead of reading the feed +every 15 seconds, they would write a new protocol buffer data file every +15 seconds using data received from their vehicles. + +***Note:** *Chapter 11. Publishing GTFS-realtime Feeds* will +show you how to create a GTFS-realtime feed using Protocol Buffers. In +order to do so, you will need to install Protocol Buffers as +demonstrated in this chapter.* + +### Installing Protocol Buffers + +In order to generate code to read or write GTFS-realtime feeds in your +native language, you first need to install Protocol Buffers. Once +installed, it is capable of generating code for Java, C++ or Python. + +This section shows you how to download and build the `protoc` +command-line tool on a UNIX or Linux-based system. These instructions +were derived from installing Protocol Buffers on Mac OS X 10.10. + +First, download and extract the Protocol Buffers source code. At the +time of writing, the current version is 2.6.1. + +``` +$ curl -L \ + https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz \ + -o protobuf-2.6.1.tar.gz + +$ tar -zxf protobuf-2.6.1.tar.gz + +$ cd protobuf-2.6.1 +``` + +***Note:** Visit and +click the "Download" link to find the latest version. The following +instructions should still work for subsequent versions.* + +Next, compile the source files using `make`. First run the +`configure` script to build the `Makefile`, then run `make`. + +``` +$ ./configure && make +``` + +***Note:** Separating the commands by && means that make will only run if +`./configure` exits successfully.* + +Once compilation is complete, you can verify the build by running **make +check**. You can then install it globally on your system using **make +install**. If you do not want to install it globally, you can run +`protoc` directly from the **./src** directory instead. + +``` +$ make check + +$ make install +``` + +Next verify that it has been successfully built and installed by running +the `protoc` command. The output should be "Missing input file." + +``` +$ protoc + +Missing input file. +``` + +The next section will show you how to generate Java files using +`protoc` and the `gtfs-realtime.proto` file. + +### Introduction to gtfs-realtime.proto + +In order to generate source code files that can read a protocol buffer, +you need a `.proto` input file. Typically you won't need to create or +modify `.proto` files yourself, it is useful to have a basic +understanding of how they work. + +A `.proto` file contains a series of instructions that defines the +structure of the data. In the case of GTFS-realtime, there is a file +called `gtfs-realtime.proto` which contains the structure for each of +the messages available (service alerts, vehicle positions and trip +updates). + +The following is an extract from `gtfs-realtime.proto` for the +`VehiclePosition` message. + +**Note: **The `TripDescriptor` and `VehicleDescriptor` types +referenced in this extract also have declarations, which are not +included here. + +``` +message VehiclePosition { + optional TripDescriptor trip = 1; + optional Position position = 2; + optional uint32 current_stop_sequence = 3; + + enum VehicleStopStatus { + INCOMING_AT = 0; + STOPPED_AT = 1; + IN_TRANSIT_TO = 2; + } + + optional VehicleStopStatus current_status = 4 [default = IN_TRANSIT_TO]; + optional uint64 timestamp = 5; + + enum CongestionLevel { + UNKNOWN_CONGESTION_LEVEL = 0; + RUNNING_SMOOTHLY = 1; + STOP_AND_GO = 2; + CONGESTION = 3; + SEVERE_CONGESTION = 4; + } + + optional CongestionLevel congestion_level = 6; + optional string stop_id = 7; + optional VehicleDescriptor vehicle = 8; + extensions 1000 to 1999; +} +``` + +Ignoring the numerical values assigned to each field (they aren't +likely to be relevant because you never directly refer to them), you can +see how the structure is the same as the specification covered earlier +in this book for vehicle positions. + +Each field in a protocol buffer has a unique value assigned to it. This +value is used internally when encoding or decoding each field in +GTFS-realtime feed. If there is additional data to be represented in a +feed, the values between 1000 and 1999 are reserved for extensions. +*Chapter 10. GTFS-realtime Extensions* shows how extensions in +GTFS-realtime work. + +### Compiling gtfs-realtime.proto + +The next step towards consuming a GTFS-realtime feed is to compile the +`gtfs-realtime.proto` file into Java code using `protoc`. In order +to do this, you must have already created a Java project ahead of time. +`protoc` will generate the files and incorporate them directly into +your project's source tree. + +These instructions assume you have created your Java project in +**/path/to/gtfsrt** and that you will download the +`gtfs-realtime.proto` file to **/path/to/protobuf**. + +First, download the `gtfs-realtime.proto` file. + +``` +$ cd /path/to/protobuf + +$ curl \ + https://developers.google.com/transit/gtfs-realtime/gtfs-realtime.proto \ + -o gtfs-realtime.proto +``` + +***Note:** If this URL is no longer current when you read this, you can +find the updated location at +.* + +In order to use `protoc`, you must specify the **--proto_path** +argument as the directory in which `gtfs-realtime.proto` resides. +Additionally, you must specify the full path to the +`gtfs-realtime.proto` file. + +Typically, in a Java project your source files will reside in a +directory called `src` within the project directory. This directory +must be specified in the **--java_out** argument. + +The full command to run is as follows: + +``` +$ protoc \ + --proto_path=/path/to/protobuf \ + --java_out=/path/to/gtfsrt/src \ + /path/to/protobuf/gtfs-realtime.proto +``` + +If this command runs successfully there will be no output to screen, but +there will be newly-created files in your source tree. There should now +be a **./com/google** directory in your source tree, and a package +called `com.google.transit.realtime`. + +### Adding the Protocol Buffers Library + +Before you can use this new package, you must add the Protocol Buffers +library to your Java project. You can either compile these files into a +Java archive (using the instructions in +**./protobuf-2.6.**1**/java/README.txt**), or you can add the Java +source files directly to your project as follows: + +``` +$ cd protobuf-2.6.1 + +$ cp -R ./java/src/main/java/com/google/protobuf \ + /path/to/gtfsrt/src/com/google/ +``` + +If you now try to build your project, an error will occur due to a +missing package called `DescriptorProtos`. You can add this to your +project using the following command: + +``` +$ cd protobuf-2.6.1 + +$ protoc --java_out=/path/to/gtfsrt/src \ + --proto_path=./src \ + ./src/google/protobuf/descriptor.proto +``` + +Your project should now build successfully, meaning you can use the +`com.google.transit.realtime` package to read data from a +GTFS-realtime feed. + +### Reading Data From a GTFS-realtime Feed + +To read the data from a GTFS-realtime feed, you need to build a +`FeedMessage` object. The simplest way to do this is by opening an +`InputStream` for the URL of the GTFS-realtime feed. + +The following code builds a `FeedMessage` for the vehicle positions +feed of the MBTA in Boston. + +***Note:** To simplify the code listings in this book, package imports +are not included. All classes used are either standard Java classes, or +classes generated by Protocol Buffers.* + +```java +public class YourClass { + public void loadFeed() throws IOException { + + URL url = new URL("http://developer.mbta.com/lib/gtrtfs/Vehicles.pb"); + + InputStream is = url.openStream(); + + FeedMessage fm = FeedMessage.parseFrom(is); + + is.close(); + + // ... + } +} +``` + +A `FeedMessage` object contains zero or more entities, each of which +is either a service alert, a vehicle position, or a trip update. You can +retrieve a list of entities using **getEntityList()**, then loop over +them as follows: + +```java +public class YourClass { + public void loadFeed() throws IOException { + + // ... + + for (FeedEntity entity : fm.getEntityList()) { + // Process the entity here + } + + // ... + + } +} +``` + +Since many of the fields in GTFS-realtime are optional, you need to +check for the presence of the field you want to use before trying to +retrieve it. This is achieved using `hasFieldName()`. You can +then retrieve it using `getFieldName()`. + +In the case of `FeedEntity`, you need to check which of the +GTFS-realtime messages are available. For instance, to check if the +entity contains a service alert, you would call `entity.hasAlert()`. +If the call to `hasAlert()` returns `true`, you can retrieve it +using `entity.getAlert()`. + +The following code shows how to access the various entities. + +```java +public class YourClass { + public void loadFeed() throws IOException { + // ... + + for (FeedEntity entity : fm.getEntityList()) { + + if (entity.hasAlert()) { + Alert alert = entity.getAlert(); + + // Process the alert here + } + + if (entity.hasVehicle()) { + VehiclePosition vp = entity.getVehicle(); + + // Process the vehicle position here + } + + if (entity.hasTripUpdate()) { + TripUpdate tu = entity.getTripUpdate(); + + // Process the trip update here + } + } + + // ... + + } +} +``` + +The next three chapters will show you how to process the `Alert`, `VehiclePosition` and `TripUpdate` objects. + +### Outputting Human-Readable GTFS-realtime Feeds + +Earlier chapters included human-readable extracts from GTFS-realtime +feeds. Although plain-text GTFS-realtime feeds are not designed to be +parsed directly, they can be useful for quickly determining the kinds of +data available within a feed. + +All objects in a GTFS-realtime feed can be output using the +`TextFormat` class in the `protobuf` package. Passing the object to +**printToString()** will generate a human-readable version of the +GTFS-realtime element. + +For instance, you can output an entire feed as follows: + +```java +FeedMessage fm = ...; + +String output = TextFormat.printToString(fm); + +System.out.println(output); +``` + +Or you can output individual entities: + +```java +for (FeedEntity entity : fm.getEntityList()) { + String output = TextFormat.printToString(entity); + + System.out.println(output); +} +``` + +Alternatively, you can call the `toString()` method on these objects +to generate the same output. + diff --git a/gtfs-realtime-book/ch-06-consuming-service-alerts.md b/gtfs-realtime-book/ch-06-consuming-service-alerts.md new file mode 100644 index 0000000..9869173 --- /dev/null +++ b/gtfs-realtime-book/ch-06-consuming-service-alerts.md @@ -0,0 +1,305 @@ +## 6. Consuming Service Alerts + +The previous chapter introduced you to Protocol Buffers and showed you +how load a remote GTFS-realtime feed into your Java project. This +chapter will show you how to read the data from each of the three entity +types (service alerts, vehicle positions and trip updates). + +The previous chapter also showed you how to loop over all entities in a +feed using `getEntityList()`. Each entity contains either a service +alert, a vehicle position or a trip update. + +Once you have verified that a `FeedEntity` element contains an alert, +you can retrieve the corresponding `Alert` object using +`getAlert()`. + +```java +for (FeedEntity entity : fm.getEntityList()) { + if (entity.hasAlert()) { + Alert alert = entity.getAlert(); + + processAlert(alert); + } +} +``` + +You can then access the specific properties of a service alert using the +returned object. + +### Cause & Effect + +For example, to retrieve the cause value for the alert, you would first +check for its presence with `hasCause()` then retrieve the value using +`getCause()`. + +```java +public void processAlert(Alert alert) { + if (alert.hasCause()) { + Cause cause = alert.getCause(); + + // ... + } + + // ... +} +``` + +The `Cause` object is an enumerator, meaning it has a finite number of +possible values. To determine which value the object corresponds to, +call `getNumber()` to compare it to the possible values. + +```java +switch (cause.getNumber()) { + case Cause.ACCIDENT_VALUE: + // ... + + case Cause.MEDICAL_EMERGENCY_VALUE: + // ... +} +``` + +Note: There are other possible cause values to include in the switch +statement; these have been omitted here as they are all covered in the +specification earlier in this book. + +The `Effect` field works in the same way. The difference is that the +possible list of values to compare against is different. + +```java +if (alert.hasEffect()) { + Effect effect = alert.getEffect(); + + switch (effect.getNumber()) { + case Effect.DETOUR_VALUE: + // ... + + case Effect.SIGNIFICANT_DELAYS_VALUE: + // ... + } +} +``` + +### Title, Description and URL + +Each of these fields are of type `TranslatedString`. A +`TranslatedString` may contain multiple `Translation` objects, so +when processing these fields you must loop over the available +translations. + +For example, to loop over the available translations for the header text +you iterate over **getTranslationList()**. + +```java +if (alert.hasHeaderText()) { + TranslatedString header = alert.getHeaderText(); + + for (Translation translation : header.getTranslationList()) { + // Process the translation here + + } +} +``` + +***Note:** To access the description you would use `hasDescription()` and +`getDescription()`, while to access the URL you would use `hasUrl()` and +`getUrl()`.* + +Alternatively, you can use `getTranslationCount()` and +`getTranslation()` to retrieve each of the available translations. + +```java +for (int i = 0; i < header.getTranslationCount(); i++) { + Translation translation = header.getTranslation(i); + + // Process the translation here +} +``` + +A `Translation` object is made up of text and optionally, its +associated language. When dealing with the URL field, the text retrieved +from `getText()` contains a full URL. + +```java +if (translation.hasLanguage()) { + String language = translation.getLanguage(); + + if (language.equals("fr")) { + // Do something for French language + } + else { + // All other languages + } +} + +if (translation.hasText()) { + String text = translation.getText(); + + // Do something with the text +} +``` + +***Note:** Most GTFS-realtime feeds only specify text in a single +language, and therefore do not include the language value.* + +### Active Period + +A service alert may contain zero or more time ranges, each of which +specify the dates and times the alert is active for. If none are +specified then the alert is active as long as it exists within the feed. + +You can access each of the `TimeRange` objects using either of the +following methods: + +```java +for (TimeRange timeRange : alert.getActivePeriodList()) { + // ... +} + +for (int i = 0; i < alert.getActivePeriodCount(); i++) { + TimeRange timeRange = alert.getActivePeriod(i); + + // ... +} +``` + +A `TimeRange` can have either a start or finish date, or it may +contain both. In Java, you can turn each of these dates into a +`java.util.Date` object as shown below. + +```java +if (timeRange.hasStart()) { + Date start = new Date(timeRange.getStart() * 1000); + + // ... +} + +if (timeRange.hasEnd()) { + Date end = new Date(timeRange.getEnd() * 1000); + + // ... +} +``` + +***Note:** The date value is multiplied by 1,000 because the date in the +GTFS-realtime is represented by the number of seconds since January 1, +1970, while `java.util.Date` is instantiated using the number of +milliseconds since the same date.* + +### Affected Entities + +A service alert may contain zero or more affected entities, each of +which describes a route, stop, agency, trip or route type. You can +access these entities using either of the following methods: + +``` +for (EntitySelector entity : alert.getInformedEntityList()) { + +} + +for (int i = 0; i < alert.getInformedEntityCount(); i++) { + EntitySelector entity = alert.getInformedEntity(i); + +} +``` + +There are a number of properties available in the `EntitySelector` +object, each of which can be used to match the entity to the +corresponding GTFS feed. + +For example, if the `EntitySelector` object has a route ID value, then +you should be able to locate the route in the corresponding GTFS feed's +`routes.txt` file. + +The properties can be accessed as follows: + +```java +if (entity.hasAgencyId()) { + String agencyId = entity.getAgencyId(); + +} + +if (entity.hasRouteId()) { + String routeId = entity.getRouteId(); + +} + +if (entity.hasRouteType()) { + int routeType = entity.getRouteType(); + +} + +if (entity.hasStopId()) { + String stopId = entity.getStopId(); + +} +``` + +The route type value is an Integer and if present must correspond either +to the standard GTFS route type values, or to the extended route type +values. + +The other entity information that can be contained in `EntitySelector` +is trip information. You can access the trip properties as follows: + +```java +if (entity.hasTrip()) { + TripDescriptor trip = entity.getTrip(); + + if (trip.hasTripId()) { + String tripId = trip.getTripId(); + + } + + if (trip.hasRouteId()) { + String routeId = trip.getRouteId(); + + } + + if (trip.hasStartDate()) { + String startDate = trip.getStartDate(); + + } + + if (trip.hasStartTime()) { + String startTime = trip.getStartTime(); + + } + + if (trip.hasScheduleRelationship()) { + ScheduleRelationship sr = trip.getScheduleRelationship(); + + } +} +``` + +You can test the `ScheduleRelationship` value by comparing the +`getNumber()` value to one of the available constants, as follows. + +```java +if (entity.hasTrip()) { + // ... + + if (trip.hasScheduleRelationship()) { + + ScheduleRelationship sr = trip.getScheduleRelationship(); + + switch (sr.getNumber()) { + case ScheduleRelationship.ADDED_VALUE: + // ... + break; + + case ScheduleRelationship.CANCELED_VALUE: + // ... + break; + + case ScheduleRelationship.SCHEDULED_VALUE: + // ... + break; + + case ScheduleRelationship.UNSCHEDULED_VALUE: + // ... + break; + } + } +} +``` diff --git a/gtfs-realtime-book/ch-07-consuming-vehicle-positions.md b/gtfs-realtime-book/ch-07-consuming-vehicle-positions.md new file mode 100644 index 0000000..1b745d0 --- /dev/null +++ b/gtfs-realtime-book/ch-07-consuming-vehicle-positions.md @@ -0,0 +1,509 @@ +## 7. Consuming Vehicle Positions + +Just like when consuming service alerts, you can loop over the +`FeedEntity` objects returned from `getEntityList()` to process +vehicle positions. If an entity contains a vehicle position, you can +retrieve it using the `getVehicle()` method. + +```java +for (FeedEntity entity : fm.getEntityList()) { + if (entity.hasAlert()) { + VehiclePosition vp = entity.getVehicle(); + processVehiclePosition(vp); + } +} +``` + +You can then process the returned `VehiclePosition` object to extract +the details of the vehicle position. + +### Timestamp + +One of the provided values is a timestamp reading of when the vehicle +position reading was taken. + +```java +if (vp.hasTimestamp()) { + Date timestamp = new Date(vp.getTimestamp() * 1000); + +} +``` + +***Note:** The value is multiplied by 1,000 because the java.util.Date +class accepts milliseconds, whereas GTFS-realtime uses whole seconds.* + +This value is useful because the age of a reading can dictate how the +data is interpreted. For example, if your latest reading was only thirty +seconds earlier, your users would realize it is very recent and +therefore is probably quite accurate. On the other hand, if the latest +reading was ten minutes earlier, they would see it had not updated +recently and may therefore not be completely accurate. + +The other way this value is useful is for determining whether to store +this new vehicle position. If your previous reading for the same vehicle +has the same timestamp, you can ignore this update, as nothing has +changed. + +### Geographic Location + +A `VehiclePosition` object contains a `Position` object, which +contains a vehicle's latitude and longitude, and may also include other +useful information such as its bearing and speed. + +```java +public static void processVehiclePosition(VehiclePosition vp) { + if (vp.hasPosition()) { + Position position = vp.getPosition(); + + if (position.hasLatitude() && position.hasLongitude()) { + float latitude = position.getLatitude(); + float longitude = position.getLongitude(); + + // ... + } + + // ... + } +} +``` + +Even though the `Position` element of a `VehiclePosition` is +required (according to the specification), checking for it explicitly +means you can handle its omission gracefully. Remember: when consuming +GTFS-realtime data you are likely to be relying on a third-party data +provider who may or may not follow the specification correctly. + +Likewise, the latitude and longitude are also required, but it is still +prudent to ensure they are included. These values are treated as any +floating-point numbers, so technically they may not be valid geographic +coordinates. + +A basic check to ensure the coordinates are valid is to ensure the +latitude is between -90 and 90 and the longitude is between -180 and +180. + +```java +float latitude = position.getLatitude(); +float longitude = position.getLongitude(); + +if (Math.abs(latitude) <= 90 && Math.abs(longitude) <= 180) { + // Valid coordinate + +} +else { + // Invalid coordinate + +} +``` + +A more advanced check would be to determine a bounding box of the data +provider's entire public transportation network from the corresponding +GTFS feed's `stops.txt`. You would then check that all received +coordinates are within or near the bounding box. + +***Note:** The important lesson to take from this is that a GTFS-realtime +feed may appear to adhere to the specification, but you should still +perform your own sanity checks on received data.* + +In addition to the latitude and longitude, you can also retrieve a +vehicle's speed, bearing and odometer reading. + +```java +if (position.hasBearing()) { + float bearing = position.getBearing(); + + // Degrees from 0-359. 0 is North, 90 is East, 180 is South, 270 is West + +} + +if (position.hasOdometer()) { + double odometer = position.getOdometer(); + + // Meters +} + +if (position.hasSpeed()) { + float speed = position.getSpeed(); + + // Meters per second +} +``` + +### Trip Information + +In order to associate a vehicle position with a particular trip from the +corresponding GTFS feed, vehicle positions may include a trip +descriptor. + +***Note:** The trip descriptor is declared as optional in the +GTFS-realtime specification. Realistically, it will be hard to provide +value to end-users without knowing which trip the position corresponds +to. At the very least, you would need to know the route (which can be +specified via the trip descriptor).* + +Just like with service alerts (covered in the previous chapter), there +are a number of values you can retrieve from a trip descriptor to +determine which trip a vehicle position belongs to. The following +listing demonstrates how to access this data: + +```java +if (vp.hasTrip()) { + TripDescriptor trip = vp.getTrip(); + + if (trip.hasTripId()) { + String tripId = trip.getTripId(); + + } + + if (trip.hasRouteId()) { + String routeId = trip.getRouteId(); + + } + + if (trip.hasStartDate()) { + String startDate = trip.getStartDate(); + + } + + if (trip.hasStartTime()) { + String startTime = trip.getStartTime(); + + } + + if (trip.hasScheduleRelationship()) { + ScheduleRelationship sr = trip.getScheduleRelationship(); + + } +} +``` + +### Vehicle Identifiers + +There are a number of values available in a vehicle position entity by +which to identify a vehicle. You can access an internal identifier (not +for public display), a label (such as a vehicle number painted on to a +vehicle), or a license plate, as shown in the following listing: + +```java +if (vp.hasVehicle()) { + VehicleDescriptor vehicle = vp.getVehicle(); + + if (vehicle.hasId()) { + String id = vehicle.getId(); + + } + + if (vehicle.hasLabel()) { + String label = vehicle.getLabel(); + + } + + if (vehicle.hasLicensePlate()) { + String licensePlate = vehicle.getLicensePlate(); + + } +} +``` + +The vehicle descriptor and the values contained within are all optional. +In the case where this information is not available, you can use the +trip descriptor provided with each vehicle position to match up vehicle +positions across multiple updates. + +Being able to match up the trip and/or vehicle reliably over subsequent +updates allows you reliably track the ongoing position changes for a +particular vehicle. For instance, if you wanted to animate the vehicle +moving on a map as new positions were received, you would need to know +that each update corresponds to a particular vehicle. + +### Current Stop + +Each vehicle position record can be associated with a single stop. If +specified, this stop must appear in the corresponding GTFS feed. + +The stop can be identified either by the `stop_id` value, or by using +the `current_stop_sequence` value. If you use the stop sequence, the +stop can be determined by finding the corresponding record in the GTFS +feed's `stop_times.txt` file. + +```java +if (vp.hasStopId()) { + String stopId = vp.getStopId(); + +} + +if (vp.hasCurrentStopSequence()) { + int sequence = vp.getCurrentStopSequence(); + +} +``` + +***Note:** It is possible for the same stop to be visited multiple times +in a single trip (consider a loop service, although there are other +instances when this may also happen). The stop sequence value can be +useful to disambiguate this case.* + +On its own, knowing the stop has no meaning without context. The +`current_status` field provides this context, indicating that the +vehicle is either: + +* In transit to the stop (it is the next stop but the vehicle is not yet nearby) +* About to arrive at the stop +* Currently stopped at the stop. + +According to the GTFS-realtime specification, you can only make use of +`current_status` if the stop sequence is specified. + +The following code shows how you can retrieve and check the value of the +stop status. + +```java +if (vp.hasCurrentStopSequence()) { + int sequence = vp.getCurrentStopSequence(); + + if (vp.hasCurrentStatus()) { + VehicleStopStatus status = vp.getCurrentStatus(); + + switch (status.getNumber()) { + case VehicleStopStatus.IN_TRANSIT_TO_VALUE: + // ... + + case VehicleStopStatus.INCOMING_AT_VALUE: + // ... + + case VehicleStopStatus.STOPPED_AT_VALUE: + // ... + + } + } +} +``` + +### Congestion Levels + +The other two values that may be included with a vehicle position relate +to the congestion inside and outside of the vehicle. + +The `congestion_level` value indicates the flow of traffic. This value does +not indicate whether or not the vehicle is running to schedule, since congestion +levels are typically accounted for in scheduling. + +The following code shows how you can check the congestion level value: + +```java +if (vp.hasCongestionLevel()) { + CongestionLevel congestion = vp.getCongestionLevel(); + + switch (congestion.getNumber()) { + case CongestionLevel.UNKNOWN_CONGESTION_LEVEL_VALUE: + // ... + + case CongestionLevel.RUNNING_SMOOTHLY_VALUE: + // ... + + case CongestionLevel.STOP_AND_GO_VALUE: + // ... + + case CongestionLevel.SEVERE_CONGESTION_VALUE: + // ... + + case CongestionLevel.CONGESTION_VALUE: + // ... + + } +} +``` + +The `occupancy_status` value indicates how full the vehicle currently +is. This can be useful to present to your users so they know what to +expect before the vehicle arrives. For instance: + +* A person with a broken leg may not want to travel on a standing + room-only bus +* Someone traveling late at night might prefer a taxi over an empty + train for safety reasons +* If a bus is full and not accepting passengers, someone may stay at + home for longer until a bus with seats is coming by. + +You can check the occupancy status of a vehicle as follows: + +```java +if (vp.hasOccupancyStatus()) { + OccupancyStatus status = vp.getOccupancyStatus(); + + switch (status.getNumber()) { + case OccupancyStatus.EMPTY_VALUE: + // ... + + case OccupancyStatus.MANY_SEATS_AVAILABLE_VALUE: + // ... + + case OccupancyStatus.FEW_SEATS_AVAILABLE_VALUE: + // ... + + case OccupancyStatus.STANDING_ROOM_ONLY_VALUE: + // ... + + case OccupancyStatus.CRUSHED_STANDING_ROOM_ONLY_VALUE: + // ... + + case OccupancyStatus.FULL_VALUE: + // ... + + case OccupancyStatus.NOT_ACCEPTING_PASSENGERS_VALUE: + // ... + + } +} +``` + +### Determining a Vehicle's Bearing + +One of the values that can be specified in a GTFS-realtime vehicle +position update is the bearing of the vehicle being reported. This value +indicates either the direction the vehicle is facing, or the direction +towards the next stop. + +Ideally, the bearing contains the actual direction that the vehicle is +facing, not the direction to the next stop, since it is possible for +this value to be inaccurate. For example, if a Northbound vehicle is +stopped at a stop (that is, directly beside it), then the calculated +bearing would indicate the vehicle was facing East, not North. + +***Note:** This example assumes that the vehicle is in a country that +drives on the right-hand side of the road.* + +There are several ways to calculate a vehicle's bearing if it is not +specified in a GTFS-realtime feed: + +* Determine the direction towards the next stop +* Determine the direction using a previous vehicle position reading +* A combination of the above. + +***Note:** The GTFS-realtime specification states that feed providers +should not include the bearing if it is calculated using previous +positions. This is because consumers of the feed can calculate this, as +shown in the remainder of this chapter.* + +### Bearing to Next Stop + +In order to determine the bearing from the current location to the next +stop, there are two values you need: + +* **Vehicle position.** This is provided in the `latitude` and + `longitude` fields of the vehicle position. +* **Position of the next stop.** This is provided by the `stop_id` + or `current_stop_sequence` fields of the vehicle position. + +***Note:** Many GTFS-realtime vehicle position feeds do not include +information about the next stop, and consequently this technique will +not work in those instances. If this is the case, using the previous +reading to determine the bearing would be used, as shown later in +*Bearing From Previous Position*.* + +The following formula is used to determine the bearing between the +starting location and the next stop: + +``` +θ = atan2( sin Δλ ⋅ cos φ2 , cos φ1 ⋅ sin φ2 − sin φ1 ⋅ cos φ2 ⋅ cos Δλ ) +``` + +In this equation, the starting point is indicated by *1*, while the next +stop is represented by *2*. Latitude is represented by *φ*, while +longitude is represented by *λ*. For example, *φ2* means the latitude of +the next stop. + +The resultant value *θ* is the bearing between the two points in +*radians*, and must then be converted to degrees (0-360). + +This equation can be represented in Java as follows. It accepts that +latitude and longitude of two points in degrees, and returns the bearing +in degrees. + +```java +public static double calculateBearing(double lat1Deg, double lon1Deg, double lat2Deg, double lon2Deg) { + + // Convert all degrees to radians + double lat1 = Math.toRadians(lat1Deg); + double lon1 = Math.toRadians(lon2Deg); + double lat2 = Math.toRadians(lat2Deg); + double lon2 = Math.toRadians(lon2Deg); + + // sin Δλ ⋅ cos φ2 + double y = Math.sin(lon2 - lon1) * Math.cos(lat2); + + // cos φ1 ⋅ sin φ2 − sin φ1 ⋅ cos φ2 ⋅ cos Δλ + double x = Math.cos(lat1) * Math.sin(lat2) - Math.sin(lat1) * Math.cos(lat2) * Math.cos(lon2 - lon1); + + // Calculate the bearing in radians + double bearingRad = Math.atan2(y, x); + + // Convert radians to degrees + double bearingDeg = Math.toDegrees(bearingRad); + + // Ensure x is positive, in the range of 0 <= x < 360 + if (bearingDeg < 0) { + bearingDeg += 360; + } + + return bearingDeg; +} +``` + +One thing to be aware of when using the next stop to determine bearing +is the `current_status` value of the vehicle position (if specified). +If the value is `STOPPED_AT`, then the calculated angle might be +significantly wrong, since the vehicle is likely directly next to the +stop. In this instance, you should either use the next stop, or +determine the bearing from the previous position reading. + +### Bearing From Previous Position + +Similar to calculating a vehicle's bearing using the direction to the +next stop, you can also use a previous reading to calculate the +vehicle's direction. + +The following diagram shows two readings for the same vehicle, as well +as the calculated bearing from the first point to the second. + +![Bearing From Previous Position](images/bearing-from-position.png) + +To calculate the bearing in this way, you need the following data: + +* **Current vehicle position.** This is provided in the `latitude` + and `longitude` fields of the vehicle position. +* **Previous vehicle position.** This should be the most recent + vehicle position recorded prior to receiving the current vehicle + position. Additionally, this position must represent a different + location to the current position. If a vehicle is stationary for + several minutes, you may receive several locations for the vehicle + with the same coordinates. + +***Note:** In the case of multiple readings at the same location, you +should use a minimum distance threshold to decide whether or not the +location is the same. For instance, you may decide that the two previous +locations must be more than, say, 10 meters to use it for comparison.* + +It is also necessary to check the age of the previous reading. If the +previous reading is more than a minute or two old, it is likely that the +vehicle has travelled far enough to render the calculated bearing +meaningless. + +### Combination + +These two strategies are useful to approximate a vehicle's bearing if +it is not specified in a vehicle position message, but they are still +reliant on certain data being available. + +The first technique needs to know the next stop, while the second needs +a previous vehicle position reading. + +Your algorithm to determine a vehicle's position could be as follows: + +* Use the provided bearing value in the vehicle position if this + available. +* Otherwise, if you have a recent previous reading, calculate the + direction using that and the current reading. +* Otherwise, if the next stop is known, show the bearing of the + vehicle towards the stop. + diff --git a/gtfs-realtime-book/ch-08-consuming-trip-updates.md b/gtfs-realtime-book/ch-08-consuming-trip-updates.md new file mode 100644 index 0000000..867e30f --- /dev/null +++ b/gtfs-realtime-book/ch-08-consuming-trip-updates.md @@ -0,0 +1,380 @@ +## 8. Consuming Trip Updates + +Of the three message types in GTFS-realtime, trip updates are the most +complex. A single trip update can contain a large quantity of data and +is used to transform the underlying schedule. Trips can be modified in a +number of ways: trips can be canceled, stops can be skipped, and arrival +times can be updated. + +This chapter will show you how to consume trip updates and will discuss +real-world scenarios and how they can be represented using trip updates. + +Similar to service alerts and vehicle positions, you can loop over the +`FeedEntity` objects from `getEntityList()` to access and then +process `TripUpdate` objects. + +```java +for (FeedEntity entity : fm.getEntityList()) { + if (entity.hasTripUpdate()) { + TripUpdate tripUpdate = entity.getTripUpdate(); + + processTripUpdate(tripUpdate); + } +} +``` + +### Timestamp + +Just like with vehicle position updates, trip updates include a +timestamp value. This indicates when the real-time data was updated, +including any subsequent arrival/departure estimates contained within +the trip update. + +You can read this value into a native `java.util.Date` object as +follows: + +```java +if (tripUpdate.hasTimestamp()) { + Date timestamp = new Date(tripUpdate.getTimestamp() * 1000); + +} +``` + +**Note:** The value is multiplied by 1,000 because the java.util.Date +class accepts milliseconds, whereas GTFS-realtime uses whole seconds. + +Two of the ways you can use the timestamp value are: + +* When telling your users arrival estimates, you can also show the + timestamp so they know when the estimate was made. A newer estimate + (e.g. one minute old) is likely to be more reliable than an older + one (e.g. ten minutes old). +* You can also use the timestamp to decide whether or not to keep the + estimate. For instance, if for some reason a feed did not refresh in + a timely manner and all of the estimates were hours old, you could + simply skip over them as they would no longer provide meaningful + information. + +### Trip Information + +Each trip update contains necessary data so the included estimates can +be linked to a trip in the corresponding GTFS feed. + +There are three ways estimate data can be provided in a trip update: + +* The trip update contains estimates for all stops. +* The trip update contains estimates for some stops. +* The trip has been canceled, so there is no stop information. + +If the trip has been added (that is, it is not a part of the schedule in +the GTFS feed), then the trip descriptor has no use in resolving the +trip back to the GTFS feed. + +However, if a trip update only contains updates for some of the stops, +you must be able to find the entire trip in the GTFS feed so you can +propagate the arrival delay to subsequent stops. + +The following code shows how to extract values such as the GTFS trip ID +and route ID from the `TripDescriptor` object. + +```java +if (tripUpdate.hasTrip()) { + TripDescriptor trip = tripUpdate.getTrip(); + + if (trip.hasTripId()) { + String tripId = trip.getTripId(); + + // ... + } + + if (trip.hasRouteId()) { + String routeId = trip.getRouteId(); + + // ... + } + + if (trip.hasStartDate()) { + String startDate = trip.getStartDate(); + + // ... + } + + if (trip.hasStartTime()) { + String startTime = trip.getStartTime(); + + // ... + } + + if (trip.hasScheduleRelationship()) { + ScheduleRelationship sr = trip.getScheduleRelationship(); + + // ... + } +} +``` + +### Trip Delay + +Although only an experimental part of the GTFS-realtime specification at +the time of writing, a trip update can also contain a delay value. A +positive number indicates the number of seconds the vehicle is late, +while a negative value indicates the number of seconds early. A value of +`0` means the vehicle is on-time. + +This value can be used for any stop along the trip that does not +otherwise have an associated `StopTimeEvent` element. + +```java +if (tripUpdate.hasDelay()) { + int delay = tripUpdate.getDelay(); + + if (delay == 0) { + // on time + + } + else if (delay < 0) { + // early + + } + else if (delay > 0) { + // late + + } +} +``` + +### Vehicle Identifiers + +The `VehicleDescriptor` object contained in a trip update provides a +number of values by which to identify a vehicle. You can access an +internal identifier (not for public display), a label (such as a vehicle +number painted on to a vehicle), or a license plate. + +The following code shows how to access these values: + +```java +if (tripUpdate.hasVehicle()) { + VehicleDescriptor vehicle = tripUpdate.getVehicle(); + + if (vehicle.hasId()) { + String id = vehicle.getId(); + + } + + if (vehicle.hasLabel()) { + String label = vehicle.getLabel(); + + } + + if (vehicle.hasLicensePlate()) { + String licensePlate = vehicle.getLicensePlate(); + + } +} +``` + +The vehicle descriptor and the values contained within are all optional. +In the case where this information is not available, you can use the +trip descriptor information provided with each vehicle position to match +up vehicle positions across multiple updates. + +### Stop Time Updates + +Each trip update contains a number of stop time updates, each of which +is a `StopTimeUpdate` object. You can access each of the +`StopTimeUpdate` objects by calling **getStopTimeUpdateList()**. + +```java +for (StopTimeUpdate stopTimeUpdate : tripUpdate.getStopTimeUpdateList()) { + // ... +} +``` + +Alternatively, you can loop over each `StopTimeUpdate` object as +follows: + +```java +for (int i = 0; i < tripUpdate.getStopTimeUpdateCount(); i++) { + StopTimeUpdate stopTimeUpdate = tripUpdate.getStopTimeUpdate(i); + + // ... +} +``` + +Each `StopTimeUpdate` object contains a schedule relationship value +(using the `ScheduleRelationship` class, which is different to that +contained in `TripDescriptor` objects). + +The schedule relationship dictates how to use the rest of the data in +the stop time update, as well as which data will be present. If the +schedule relationship value is not present, the value is assumed to be +`SCHEDULED`. + +```java +ScheduleRelationship sr; + +if (stopTimeUpdate.hasScheduleRelationship()) { + sr = stopTimeUpdate.getScheduleRelationship(); + +} +else { + sr = ScheduleRelationship.SCHEDULED; + +} + +if (sr.getNumber() == ScheduleRelationship.SCHEDULED_VALUE) { + // An arrival and/or departure estimate is provided + +} +else if (sr.getNumber() == ScheduleRelationship.NO_DATA_VALUE) { + // No real-time data available in this update + +} +else if (sr.getNumber() == ScheduleRelationship.SKIPPED_VALUE) { + // The vehicle will not stop at this stop + +} +``` + +### Stop Information + +In order to determine which stop a stop time update corresponds to, +either the stop ID or stop sequence (or both) must be specified. You can +then look up the stop based on its entry in `stops.txt`, or determine +the stop based on the corresponding entry in `stop_times.txt`. + +```java +if (stopTimeUpdate.hasStopId()) { + String stopId = stopTimeUpdate.getStopId(); + +} + +if (stopTimeUpdate.hasStopSequence()) { + int sequence = stopTimeUpdate.getStopSequence(); + +} +``` + +### Arrival/Departure Estimates + +If the `ScheduleRelationship` value is `SKIPPED` there must either +be an arrival or departure object specified. Both the arrival and +departure use the `StopTimeEvent` class, which can be accessed as +follows. + +The following code shows how to access these values: + +```java +if (sr.getNumber() == ScheduleRelationship.SCHEDULED_VALUE) { + if (stopTimeUpdate.hasArrival()) { + StopTimeEvent arrival = stopTimeUpdate.getArrival(); + + // Process the arrival + } + + if (stopTimeUpdate.hasDeparture()) { + StopTimeEvent departure = stopTimeUpdate.getDeparture(); + + // Process the departure + } +} +``` + +Each `StopTimeEvent` object contains either an absolute timestamp for +the arrival or departure, or it contains a delay value. The delay value +is relative to the scheduled time in the corresponding GTFS feed. + +```java +if (stopTimeUpdate.hasArrival()) { + StopTimeEvent arrival = stopTimeUpdate.getArrival(); + + if (arrival.hasDelay()) { + int delay = arrival.getDelay(); + + // ... + } + + if (arrival.hasTime()) { + Date time = new Date(arrival.getTime() * 1000); + + // ... + } + + if (arrival.hasUncertainty()) { + int uncertainty = arrival.getUncertainty(); + + // ... + } +} +``` + +### Trip Update Scenarios + +This chapter has shown you how to handle trip update information as it +appears in a GTFS-realtime feed. It is also important to understand the +intent of the data provider when reading these updates. Consider the +following scenarios that can occur frequently in large cities: + +1. **A train needs to skip one or more stops.** Perhaps the station has + been closed temporarily due to unforeseen circumstances. +2. **A train that was scheduled to bypass a station will now stop at + it.** Perhaps there is a temporary delay on the line so the train + will wait at a stop while the track is cleared. +3. **A bus is rerouted down a different street.** Perhaps there was a + car accident earlier and police have redirected traffic. +4. **A train will stop at a different platform.** For example, instead + of a train stopping at platform 5 at a given station, it will now + stop at platform 6. This happens frequently on large train networks + such as in Sydney. +5. **A bus trip is completely canceled.** Perhaps the bus has broken + down and there is no replacement vehicle. +6. **An unplanned trip is added.** Perhaps there is an unexpectedly + large number of passengers so extra buses are brought in to clear + the backlog. + +While each provider may represent these scenarios differently in +GTFS-realtime, it is likely each of them would be represented as +follows. + +1. **Modify the existing trip.** Include a `StopTimeUpdate` for each + stop that is to be canceled with a `ScheduleRelationship` value of + `SKIPPED`. +2. **Cancel the trip and add a new one.** Unfortunately, there is no + way in GTFS-realtime to insert a stop into an existing trip. The + `ScheduleRelationship` value for the trip would be set to + `CANCELED` (in the `TripDescriptor`, not in the + `StopTimeUpdate` elements). +3. If a bus is rerouted down a different street, how it is handled + depends on which stops are missed: + + * If no stops are to be made on the new street, cancel the stops + impacted by the detour, similar to scenario 1. + * If the bus will stop on the new street to drop passengers off or + pick them up, then cancel the trip and add a new one, similar to + scenario 2. + +4. **Cancel the trip and add a new one.** Since it is not possible to + insert a stop using GTFS-realtime, the existing trip must be + canceled and a new trip added to replace it if you want the platform + to be reflected accurately. Note, however, that many data providers + do not differentiate between platforms in their feeds, so they only + refer to the parent station instead. In this instance a platform + change should be communicated using service alerts if it could + otherwise cause confusion. +5. **Cancel the trip.** In this instance you would not need to include + any `StopTimeUpdate` elements for the trip; rather, you would + specify `CANCELED` in the `ScheduleRelationship` field of + `TripDescriptor`. +6. **Add a new trip.** When adding a new trip, all of the stops in the + trip should also be included (not just the next one). The + `ScheduleRelationship` for the trip would be set to `ADDED`. + +The important takeaway from this section is that if you are telling your +users that a trip has been canceled, you need to make it clear to them +if a new trip has replaced it, otherwise they may not correctly +understand the intent of the data. + +In these instances, hopefully the data provider also provides a +corresponding service alert so the reason for the change can be +communicated to passengers. + diff --git a/gtfs-realtime-book/ch-09-database-storage.md b/gtfs-realtime-book/ch-09-database-storage.md new file mode 100644 index 0000000..e9b42f2 --- /dev/null +++ b/gtfs-realtime-book/ch-09-database-storage.md @@ -0,0 +1,492 @@ +## 9. Storing Feed Data in a Database + +This chapter will demonstrate how to save data from a GTFS-realtime feed +into an SQLite database. In order to look up trips, stops, routes and +other data from the GTFS-realtime feed, this SQLite database will also +contain the data from the corresponding GTFS feed. + +To do this, the `GtfsToSql` and `GtfsRealTimeToSql` tools which have +been written for the purpose of this book and the preceding book, *The +Definitive Guide to GTFS* ([http://gtfsbook.com](http://gtfsbook.com/)), +will be used. + +GTFS and GTFS-realtime feeds from the MBTA in Boston +() will also be used. + +### Storing GTFS Data in an SQLite Database + +*The Definitive Guide to GTFS* demonstrated how to populate an SQLite +database using `GtfsToSql`. Here is an abbreviated version of the +steps required to do so. + +First, download `GtfsToSql` from +. This is a Java command-line +application to import a GTFS feed into an SQLite database. + +The pre-compiled `GtfsToSql` Java archive can be downloaded from its +GitHub repository at +. + +Next, download the MBTA GTFS feed, available from +. + +``` +$ curl http://www.mbta.com/uploadedfiles/MBTA_GTFS.zip -o gtfs.zip + +$ unzip gtfs.zip -d mbta/ +``` + +To create an SQLite database from this feed, the following command can +be used: + +``` +$ java -jar GtfsToSql.jar -s jdbc:sqlite:./db.sqlite -g ./mbta -o +``` + +***Note:** The -o flag enables the recording of additional useful data in +the database. For instance, each entry in the trips table will contain +the departure and arrival time (which would otherwise only be available +by looking up the `stop_times` table).* + +This may take a minute or two to complete (you will see progress as it +imports the feed and then creates indexes), and at the end you will have +a GTFS database in a file called `db.sqlite`. You can then query this +database with the command-line `sqlite3` tool, as shown in the +following example: + +``` +$ sqlite3 db.sqlite + +sqlite> SELECT agency_id, agency_name, agency_url, agency_lang FROM agency; + +2|Massport|http://www.massport.com|EN + +1|MBTA|[http://www.mbta.com](http://www.mbta.com/)|EN +``` + +***Note:** For more information about storing and querying GTFS data, +please refer to *The Definitive Guide to GTFS*, available from +[http://gtfsbook.com](http://gtfsbook.com/).* + +### Storing GTFS-realtime Data in an SQLite Database + +Once the GTFS data has been imported into the SQLite database, you can +then start importing GTFS-realtime data. For this you can use the +`GtfsRealTimeToSql` tool specifically written to import data into an +SQLite database. + +***Note:** Technically you do not need the GTFS data present in the +database as well, but having it makes it far simpler to resolve trip, +route and stop data.* + +The GTFS data will change infrequently (perhaps every few weeks or +months), while the realtime data can change several times per minute. +`GtfsRealTimeToSql` will frequently update a feed, according to the +refresh time specified. Each time it updates, the data saved on the +previous iteration is deleted, as that data is no longer the most +up-to-date data. + +To get started, download `GtfsRealTimeToSql` from its GitHub +repository at . Just +like `GtfsToSql`, this is a Java command-line application. The +pre-compiled `GtfsRealTimeToSql` Java archive can be downloaded from +. + +Since the `db.sqlite` database contains the MBTA GTFS feed, you can +now run `GtfsRealTimeToSql` with one of MBTA's GTFS-realtime feeds. +For instance, their vehicle positions feed is located at +. + +``` +java -jar GtfsRealTimeToSql.jar \ + -u "http://developer.mbta.com/lib/gtrtfs/Vehicles.pb" \ + -s jdbc:sqlite:./db.sqlite \ + -refresh 15 +``` + +When you run this command, the vehicle positions feed will be retrieved +every 15 seconds (specified by the `-refresh` parameter), and the data +will be saved into the `db.sqlite` SQLite database. + +MBTA also have a trip updates feed and a service alerts feed. In order +to load each of these feeds you will need to run `GtfsRealTimeToSql` +three separate times. To allow it to run in the background, add the +`-d` parameter (to make it run as a server daemon), and background it +using **&**. + +To load the vehicle positions in the background, stop the previous +command, then run the following command instead. + +``` +java -jar GtfsRealTimeToSql.jar \ + -u "http://developer.mbta.com/lib/gtrtfs/Vehicles.pb" \ + -s jdbc:sqlite:./db.sqlite \ + -refresh 15 \ + -d & +``` + +Now you can also load the trip updates into the same `db.sqlite` file. +MBTA's trip updates feed is located at +. The following +command reloads the trip updates every 30 seconds: + +``` +java -jar GtfsRealTimeToSql.jar \ + -u "http://developer.mbta.com/lib/gtrtfs/Passages.pb" \ + -s jdbc:sqlite:./db.sqlite \ + -refresh 30 \ + -d & +``` + +***Note:** You may prefer more frequent updates (such as 15 seconds), or +even less frequent (such as 60 seconds). The more frequently you update, +the greater your server utilization will be.* + +Finally, to load the service alerts feed, use the same command, but now +with the feed located at +. Generally, +service alerts are updated infrequently by providers, so you can use a +refresh time such as five or ten minutes (300 or 600 seconds). + +``` +java -jar GtfsRealTimeToSql.jar \ + -u "http://developer.mbta.com/lib/gtrtfs/Alerts/Alerts.pb" \ + -s jdbc:sqlite:./db.sqlite \ + -refresh 300 \ + -d & +``` + +These three feeds will continue to be reloaded until you terminate their +respective processes. + +### Querying Vehicle Positions + +When using `GtfsRealTimeToSql`, vehicle positions are stored in a +table called `gtfs_rt_vehicles`. If you want to retrieve positions +for, say, the route with an ID of 742, you can run the following query: + +``` +$ sqlite3 db.sqlite + +sqlite> SELECT route_id, trip_id, trip_date, trip_time, trip_sr, latitude, longitude + FROM gtfs_rt_vehicles + WHERE route_id = 742; +``` + +This query returns the trip descriptor and GPS coordinates for all +vehicles on the given route. The following table shows sample results +from this query. + +| `route_id` | `trip_id` | `trip_date` | `trip_time` | `trip_sr` | `latitude` | `longitude` | +| :--------- | :--------- | :----------- | :---------- | :-------- | :--------- | :---------- | +| 742 | 25900860 | 20150315 | | 0 | 42.347309 | -71.040359 | +| 742 | | | | | 42.331505 | -71.065590 | + +***Note:** The trip_sr column corresponds to the trip's schedule +relationship value.* + +This particular snapshot of vehicle position data shows two different +trips for route 742, which corresponds to the SL2 Silver Line. + +``` +sqlite> SELECT route_short_name, route_long_name FROM routes WHERE +route_id = 742; + +SL2|Silver Line SL2 +``` + +The first five columns retrieved are the trip identifier fields, which +are used to link a vehicle position with a trip from the GTFS feed. The +first row is simple: the `trip_id` value can be used to look up the +row from the `trips` table. + +``` +sqlite> SELECT service_id, trip_headsign, direction_id, block_id + +FROM trips + +WHERE trip_id = '25900860'; +``` + +The results from this query are as follows. + +| `service_id` | `trip_headsign` | `direction_id` | `block_id` | +| :--------------------------- | :-------------- | :------------- | :--------- | +| BUSS12015-hbs15017-Sunday-02 | South Station | 1 | S742-61 | + +***Note:** To see all fields available in this or any other table +(including `gtfs_rt_vehicles`), use the .schema command in SQLite. For +instance, enter the command `.schema gtfs_rt_vehicles`.* + +One helpful thing MBTA does is to include the trip starting date when +they include the trip ID. This helps to disambiguate trips that may +start or finish around midnight or later. + +The second vehicle position is another matter. This record does not +include any trip descriptor information other than the route ID. This +means you cannot reliably link this vehicle position with a specific +trip from the GTFS feed. + +However, knowing the route ID and the vehicle's coordinates may be +enough to present useful information to the user: "A bus on the Silver +Line SL2 route is at this location." You cannot show them if the vehicle +is running late, early or on-time, but you can show the user where the +vehicle is. + +### Querying Trip Updates + +The `GtfsRealTimeToSql` tool stores trip update data in two tables: +one for the main trip update data (such as the trip descriptor), and +another to store each individual stop time event that belongs within +each update. + +For example, to retrieve all trip updates for the route with ID 742 once +again, you could use the following query: + +```sql +SELECT route_id, trip_id, trip_date, trip_time, trip_sr, vehicle_id, vehicle_label + FROM gtfs_rt_trip_updates + WHERE route_id = '742'; +``` + +This query will return data similar to the following table: + +| `route_id` | `trip_id` | `trip_date` | `trip_time` | `trip_sr` | `vehicle_id` | `vehicle_label` | +| :--------- | :-------- | :---------- | :---------- | :-------- | :----------- | :-------------- | +| 742 | 25900860 | 20150315 | | 0 | y1106 | 1106 | +| 742 | 25900856 | 20150315 | | 0 | | | +| 742 | 25900858 | 20150315 | | 0 | | | + +Each of these returned records has corresponding records in the +`gtfs_rt_trip_updates_stoptimes` table that includes the +arrival/departure estimates for various stops on the trip. + +When using `GtfsRealTimeToSql`, an extra column called `update_id` +is included on both tables so they can be linked together. For example, +to retrieve all updates for the route ID 742, you can join the tables +together as follows: + +```sql +SELECT trip_id, arrival_time, arrival_delay, departure_time, departure_delay, stop_id, stop_sequence + FROM gtfs_rt_trip_updates_stoptimes + JOIN gtfs_rt_trip_updates USING (update_id) + WHERE route_id = '742'; +``` + +MBTA's trip updates feed only includes a stop time prediction for the +next stop. This means that each trip in the results only has one +corresponding record. You must then apply the delay to subsequent stop +times for the given trip. + +| `trip_id` | `arrival_time` | `arrival_delay` | `departure_time` | `departure_delay` | `stop_id` | `stop_sequence` | +| :-------- | :------------- | :-------------- | :--------------- | :---------------- | :-------- | :-------------- | +| 25900860 | | 30 | | | 74617 | 10 | +| 25900856 | | | | 0 | 74611 | 1 | +| 25900858 | | | | 0 | 31255 | 1 | + +There are several things to note in these results. Firstly, MBTA do not +provide a timestamp for `arrival_time` and `departure_time`; they +only provide the delay offsets that can be compared to the scheduled +time in the GTFS feed. + +Secondly, the second and third trips listed have not yet commenced. You +can deduce this just by looking at this table, since the next stop has +stop sequence of `1` (and therefore there are no stops before it). + +The first trip is delayed by 30 seconds. In other words, it will arrive +30 seconds later than it was scheduled. The data received from the +GTFS-realtime feed does not actually indicate the arrival timestamp, but +you can look up the corresponding stop time from the GTFS feed to +determine this. + +The following query demonstrates how to look up the arrival time: + +```sql +SELECT s.stop_name, st.arrival_time + FROM stop_times st, trips t, stops s + WHERE st.trip_index = t.trip_index + AND st.stop_index = s.stop_index + AND s.stop_id = '74617' + AND t.trip_id = '25900860' + AND st.stop_sequence = 10; +``` + +***Note:** The `GtfsToSql` tool used to import the GTFS feed adds fields +such as trip_index and stop_index in order to speed up data searching. +There is more discussion on the rationale of this in *The Definitive +Guide to GTFS*, available from +[http://gtfsbook.com](http://gtfsbook.com/).* + +This query joins the `stop_times` table to both the `trips` and +`stops` table in order to look up the corresponding arrival time. The +final three rows in the query contain the values returned above (the +`stop_id`, `trip_id` and the `stop_sequence`), to find the +following record: + +``` +South Station Silver Line - Inbound|21:17:00 +``` + +This means the scheduled arrival time for South Station Silver Line is +9:17:00 PM. Since the estimate indicates a delay of 30 seconds, the new +arrival time is 9:17:30 PM. + +***Note:** Conversely, if the delay was -30 instead of 30, the vehicle +would be early and arrive at 9:16:30 PM.* + +One thing not touched upon in this example is the stop time's schedule +relationship. The `gtfs_rt_trip_updates_stoptimes` also includes a +column called `rship`, which is used to indicate the schedule +relationship for the given stop. If this value was skipped (a value of +`1`), then it means the vehicle will stop here. + +### Determining Predictions Using Blocks + +In GTFS, the `block_id` value in **trips.txt** is used to indicate a +series of one or more trips undertaken by a single vehicle. In other +words, once it gets to the end of one trip, it starts a new trip from +that location. If a given trip is very short, a vehicle may perform up +to fifty or one hundred trips in a single day. + +This can be useful for determining estimates for future trips that may +not be included in the GTFS feed. For instance, if a trip is running 30 +minutes late, then it is highly likely that subsequent trips on that +block will also be running late. + +Referring back to the trip data returned in the above example, the +following data can be retrieved from the GTFS feed: + +```sql +SELECT trip_id, block_id, service_id, departure_time, arrival_time + FROM trips + WHERE trip_id IN ('25900860', '25900856', '25900858') + ORDER BY departure_time; +``` + +This returns the following data. + +| `trip_id` | `block_id` | `service_id` | `departure_time` | `arrival_time` | +| :-------- | :--------- | :--------------------------- | :--------------- | :------------- | +| 25900860 | S742-61 | BUSS12015-hbs15017-Sunday-02 | 21:04:00 | 21:17:00 | +| 25900856 | S742-61 | BUSS12015-hbs15017-Sunday-02 | 21:18:00 | 21:28:00 | +| 25900858 | S742-61 | BUSS12015-hbs15017-Sunday-02 | 21:35:00 | 21:48:00 | + +Each of these trips have the same values for `block_id` and +`service_id`, meaning the same vehicle will complete all three trips. +The data indicates that the first trip is running 30 seconds late, so +its arrival time will be 21:17:30. + +Because the second trip is scheduled to depart at 21:18:00, there is a +buffer time to catch up (in other words, the first trip is only 30 +seconds late, so hopefully it will not impact the second trip). + +If, for instance, the second trip ran 10 minutes late (and therefore +arrived at 21:38:00 instead of 21:28:00), you could reasonably assume +the third trip would begin at about 21:38:00 instead of 21:35:00 (about +three minutes late). + +### Querying Service Alerts + +When using `GtfsRealTimeToSql` to store service alerts, data is stored +in three tables. The main service alert information is stored in +`gtfs_rt_alerts`. Since there can be any number of time ranges or +affected entities for any given alert, there is a table to hold time +ranges (`gtfs_rt_alerts_timeranges`) and one to hold affected entities +(`gtfs_rt_alerts_entities`). + +In order to link this data together, each alert has an `alert_id` +value (created by `GtfsRealTimeToSql`) which is also present for any +corresponding time range and affected entities records. + +To retrieve service alerts from the database, you can use the following +query: + +```sql +SELECT alert_id, header, description, cause, effect FROM gtfs_rt_alerts; +``` + +At the time of writing, this yields the following results. The +description has been shortened as they are quite long and descriptive in +the original feed. + +| `alert_id` | `header` | `description` | `cause` | `effect` | +| :--------- | :--------------------------- | :--------------------------------------------------------- | :------- | :------- | +| 6 | Route 11 detour | Route 11 outbound detoured due to ... | 1 | 4 | +| 11 | Ruggles elevator unavailable | ... Commuter Rail platform to the lobby is unavailable ... | 9 | 7 | +| 33 | Extra Franklin Line service | | 1 | 5 | + +**Note: **A cause value of 1 corresponds to UNKNOWN_CAUSE, while 9 +corresponds to MAINTENANCE. An effect value of 4 corresponds to DETOUR, +7 corresponds to OTHER_EFFECT, while 5 corresponds to +ADDITIONAL_SERVICE. + +To determine the timing of these alerts, look up the +`gtfs_rt_alerts_timeranges` for the given alerts. + +``` +$ export TZ=America/New_York + +$ sqlite3 db.sqlite + +sqlite> SELECT alert_id, datetime(start, 'unixepoch', 'localtime'), datetime(finish, 'unixepoch', 'localtime') + FROM gtfs_rt_alerts_timeranges + WHERE alert_id IN (6, 11, 33); +``` + +***Note:** In this example, the system timezone has been temporarily +changed to America/New_York (the timezone specified in MBTA's +agency.txt file) so the timestamps are formatted correctly. You may +prefer instead to format the start and finish timestamps using your +programming language of choice.* + +| `alert_id` | `start` (formatted) | `finish` (formatted) | +| :--------- | :------------------ | :------------------- | +| 6 | 2015-02-17 15:56:52 | | +| 11 | 2015-03-18 06:00:00 | 2015-03-18 16:00:00 | +| 11 | 2015-03-19 06:00:00 | 2015-03-19 16:00:00 | +| 33 | 2015-03-16 10:58:38 | 2015-03-18 02:30:00 | + +These dates indicate the following: + +* The alert with an ID of 6 began on February 17 and has no specified end date. +* The alert with an ID of 11 will occur over two days between 6 AM and 4 PM. + *The alert with an ID of 33 will last for almost two days. + +Finally, to determine which entities (routes, trips, stops) are affected +by these alerts, query the `gtfs_rt_alerts_entities` table. + +```sql +SELECT alert_id, agency_id, route_id, route_type, stop_id, trip_id, trip_start_date, trip_start_time, trip_rship + FROM gtfs_rt_alerts_entities WHERE alert_id IN (6, 11, 33); +``` + +This results in the following data, describing only an entity for the +final service alert. + +| `alert_id` | `agency_id` | `route_id` | `route_type` | `stop_id` | `trip_id` | `trip_start_date` | `trip_start_time` | `trip_rship` | +| :--------- | :---------- | :---------- | :----------- | :-------- | :-------- | :---------------- | :---------------- | :----------- | +| 6 | 1 | CR-Franklin | 2 | | | | | | + +Using the `route_id` value, you can find the route from the GTFS feed: + +```sql +SELECT agency_id, route_type, route_long_name, route_desc + FROM routes WHERE route_id = 'CR-Franklin'; +``` + +This query will result in the following data: + +| `agency_id` | `route_type` | `route_long_name` | `route_desc` | +| :---------- | :----------- | :---------------- | :------------ | +| 1 | 2 | Franklin Line | Commuter Rail | + +In effect, this means that if you have a web site or app displaying the +schedule for the Franklin line, this alert should be displayed so the +people who use the line are aware of the change. + +***Note:** Even though the `agency_id` and `route_type` values are included, +you do not really need these since the `route_id` can only refer to one +row in the GTFS feed. If instead you wanted to refer to ALL rail lines, +the alert would have the `route_id` value blank but keep the `route_type` +value.* + diff --git a/gtfs-realtime-book/ch-10-extensions.md b/gtfs-realtime-book/ch-10-extensions.md new file mode 100644 index 0000000..993814c --- /dev/null +++ b/gtfs-realtime-book/ch-10-extensions.md @@ -0,0 +1,374 @@ +## 10. GTFS-realtime Extensions + +*Introduction to gtfs-realtime.proto* showed you an extract +from the `gtfs-realtime.proto` file that is used as an input to the +Protocol Buffers `protoc` command. + +One of the lines in this extract included the following `extensions` +directive: + +``` +extensions 1000 to 1999; +``` + +Each element in a Protocol Buffers entity must be assigned a value. This +is the value used to represent the element in the binary protocol buffer +stream (for instance, the `trip` element was assigned a value of +`1`). The `extensions` directive reserves the values between 1000 +and 1999 for external use. + +Having these values reserved means that transit agencies are free to +include additional information in their own GTFS-realtime feeds. In this +instance, the agency provides its own `proto` file to use with +`protoc` that builds on the original `gtfs-realtime.proto` file. + +At the time of writing, the following extensions are defined: + +| Extension ID | Developer | +| :----------- | :-------- | +| 1000 | OneBusAway | +| 1001 | New York City MTA | +| 1002 | Google | +| 1003 | OVapi | +| 1004 | Metra | + +You can find this list at +). As more +agencies release GTFS-realtime feeds this list will grow, since many +agencies have specific requirements in the way their internal systems +work, as well as to facilitate providing data in a way that is +understood by users of the given transport system. + +To demonstrate how extensions are specified and used, the following case +study will show you the extension for the New York City subway system. + +### Case Study: New York City Subway + +One of the agencies that provides an extension is New York City MTA. +They add a number of custom fields to their subway GTFS-realtime feeds. + +The following is a slimmed-down version of their Protocol Buffers +definition file, available from +. + +``` +option java_package = "com.google.transit.realtime"; + +import "gtfs-realtime.proto"; + +message TripReplacementPeriod { + optional string route_id = 1; + optional transit_realtime.TimeRange replacement_period = 2; +} + +message NyctFeedHeader { + required string nyct_subway_version = 1; + repeated TripReplacementPeriod trip_replacement_period = 2; +} + +extend transit_realtime.FeedHeader { + optional NyctFeedHeader nyct_feed_header = 1001; +} + +message NyctTripDescriptor { + optional string train_id = 1; + optional bool is_assigned = 2; + + enum Direction { + NORTH = 1; + EAST = 2; + SOUTH = 3; + WEST = 4; + } + + optional Direction direction = 3; +} + +extend transit_realtime.TripDescriptor { + optional NyctTripDescriptor nyct_trip_descriptor = 1001; +} + +// NYCT Subway extensions for the stop time update + +message NyctStopTimeUpdate { + optional string scheduled_track = 1; + optional string actual_track = 2; +} + +extend transit_realtime.TripUpdate.StopTimeUpdate { + optional NyctStopTimeUpdate nyct_stop_time_update = 1001; +} +``` + +This file begins by importing the original `gtfs-realtime.proto` file +that was examined in *Introduction to gtfs-realtime.proto*. + +It then defines a number of new element types (they choose to prefix +them using `Nyct`, although the only important thing here is that they +don't conflict with the names from `gtfs-realtime.proto`). + +This definition file then extends the `TripDescriptor` and +`StopTimeUpdate` element types. Values +`1000` to `1999` are reserved for custom extensions. This is the +reason why the added `nyct_trip_descriptor` and +`nyct_stop_time_update` fields use numbers in this range. + +Compiling an Extended Protocol Buffer + +In order to make use of these extended fields, you first need to +download and compile the `nyct-subway.proto` file into the same +directory as `gtfs-realtime.proto` (*Compiling gtfs-realtime.proto*). + +``` +$ cd /path/to/protobuf + +$ curl \ + http://datamine.mta.info/sites/all/files/pdfs/nyct-subway.proto.txt \ + -o nyct-subway.proto +``` + +You can now build a Java class similar to before, but using the +`nyct-subway.proto` file as the input instead of +`gtfs-realtime.proto`: + +``` +$ protoc \ + --proto_path=/path/to/protobuf \ + --java_out=/path/to/gtfsrt/src \ + /path/to/protobuf/nyct-subway.proto +``` + +If this command executes successfully, you will now have a file called +`NyctSubway.java` in the **./src/com/google/transit/realtime** +directory (in addition to `GtfsRealtime.java`, which is still +required). + +The next section will show you how to use this class. + +### Registering Extensions + +The process to load an extended GTFS-realtime is the same as a regular +feed (in that you call **parseFrom()** to build the `FeedMessage` +object), but first you must register the extensions. + +In Java, this is achieved using the `ExtensionRegistry` class and the +**registerAllExtensions()** helper method. + +``` +import com.google.protobuf.ExtensionRegistry; + +... + +ExtensionRegistry registry = ExtensionRegistry.newInstance(); + +NyctSubway.registerAllExtensions(registry); +``` + +The `ExtensionRegistry` object is then passed to **parseFrom()** as +the second argument. The following code shows how to open and read the +main New York City subway feed. + +***Note:** A developer API key is required to access the MTA +GTFS-realtime feeds. You can register for a key at +[http://datamine.mta.info](http://datamine.mta.info/), then substitute +it into the key variable.* + +```java +public class YourClass { + public void loadFeed() throws IOException { + + String key = "*YOUR_KEY*"; + + URL url = new URL("http://datamine.mta.info/mta_esi.php?feed_id=1&key=" + key); + + InputStream is = url.openStream(); + + ExtensionRegistry registry = ExtensionRegistry.newInstance(); + + NyctSubway.registerAllExtensions(registry); + + FeedMessage fm = FeedMessage.parseFrom(is, registry); + + is.close(); + + // ... + + } +} +``` + +Once the feed has been parsed, you will no longer need to pass the +extension registry around. + +### Accessing Extended Protocol Buffer Elements + +Earlier it was explained that the general paradigm with reading +GTFS-realtime data is to check the existence of a field using +`hasFieldName()`, and to retrieve the value using +`getFieldName()`. + +This also applies to retrieving extended elements, but instead of there +being built-in methods for each extended field type, use +`hasExtension(fieldName)` and `getExtension(fieldName)`. + +The argument passed to `hasExtension()` and `getExtension()` is a +unique identifier for that field. The identifier is a static property of +the `NyctSubway` class. For example, the `nyct_trip_descriptor` +extended field has a unique identifier of +`NyctSubway.nyctTripDescriptor`. + +Since this field is an extension to the standard `TripDescriptor` +field, you can check for its presence as follows: + +```java +TripUpdate tripUpdate = entity.getTripUpdate(); + +TripDescriptor td = tripUpdate.getTrip(); + +if (td.hasExtension(NyctSubway.nyctTripDescriptor)) { + // ... +} +``` + +The `NyctSubway.nyctTripDescriptor` identifier corresponds to a field +of the created class `NyctTripDesciptor`, meaning that you can +retrieve its value and assign it directly to the class type. + +```java +NyctTripDescriptor nyctTd = td.getExtension(NyctSubway.nyctTripDescriptor); +``` +You can now access the values directly from this instance of +`NyctTripDescriptor`. For example, one of the added fields is +direction, which indicates whether the general direction of the train is +North, South, East or West. The following code shows how to use this +value: + +```java +if (nyctTd.hasDirection()) { + Direction direction = nyctTd.getDirection(); + + switch (direction.getNumber()) { + case Direction.NORTH_VALUE: // Northbound train + break; + + case Direction.SOUTH_VALUE: // Southbound train + break; + + case Direction.EAST_VALUE: // Eastbound train + break; + + case Direction.WEST_VALUE: // Westbound train + break; + + } +} +``` + +Similarly, you can access other extended fields, either from the +`NyctTripDescriptor` object, or from an instance of the +`NyctStopTimeUpdate` extension. + +### GTFS-realtime Extension Complete Example + +Piecing together the snippets from this case study, the following code +shows in context how you can access the extra fields such as the +train's direction and identifier: + +```java +public class NyctProcessor { + + // Loads and processes the feed + public void process(String apiKey) throws IOException { + + URL url = new URL("http://datamine.mta.info/mta_esi.php?feed_id=1&key=" + apiKey); + + InputStream is = url.openStream(); + + // Register the NYC-specific extensions + + ExtensionRegistry registry = ExtensionRegistry.newInstance(); + + NyctSubway.registerAllExtensions(registry); + + FeedMessage fm = FeedMessage.parseFrom(is, registry); + + // Loop over all entities + + for (FeedEntity entity : fm.getEntityList()) { + // In this example only trip updates are processed + + if (entity.hasTripUpdate()) { + processTripUpdate(entity.getTripUpdate()); + } + } + } + + // Used to process a single trip update + public void processTripUpdate(TripUpdate tripUpdate) { + + if (tripUpdate.hasTrip()) { + + TripDescriptor td = tripUpdate.getTrip(); + + // Check if the extended trip descriptor is available + + if (td.hasExtension(NyctSubway.nyctTripDescriptor)) { + NyctTripDescriptor nyctTd = td.getExtension(NyctSubway.nyctTripDescriptor); + + processNyctTripDescriptor(nyctTd); + } + } + } + + // Process a single extended trip descriptor + public void processNyctTripDescriptor(NyctTripDescriptor nyctTd) { + + // If the train ID is specified, output it + if (nyctTd.hasTrainId()) { + String trainId = nyctTd.getTrainId(); + + System.out.println("Train ID: " + trainId); + } + + // If the direction is specified, output it + + if (nyctTd.hasDirection()) { + Direction direction = nyctTd.getDirection(); + + String directionLabel = null; + + switch (direction.getNumber()) { + case Direction.NORTH_VALUE: + directionLabel = "North"; + break; + + case Direction.SOUTH_VALUE: + directionLabel = "South"; + break; + + case Direction.EAST_VALUE: + directionLabel = "East"; + break; + + case Direction.WEST_VALUE: + directionLabel = "West"; + break; + + default: + directionLabel = "Unknown Value"; + } + + System.out.println("Direction: " + directionLabel); + } + } +} +``` + +After you invoke the `process()` method, your output should be similar +to the following: + +``` +Direction: North + +Train ID: 06 0139+ PEL/BBR +``` diff --git a/gtfs-realtime-book/ch-11-publishing-feeds.md b/gtfs-realtime-book/ch-11-publishing-feeds.md new file mode 100644 index 0000000..2569f57 --- /dev/null +++ b/gtfs-realtime-book/ch-11-publishing-feeds.md @@ -0,0 +1,348 @@ +## 11. Publishing GTFS-realtime Feeds + +So far this book has been focused on how to consume GTFS-realtime feeds; +in this chapter you will be shown how to create and publish your own +GTFS-realtime feeds. + +While this chapter is primarily intended for transit agencies (or +third-party companies providing services to public transit companies), +this information can be useful in other situations also. + +Even if you do not represent a transit agency or have access to the GPS +units of an entire bus fleet, there may still be situations where you +want to produce a GTFS feed. For example, if you have a trip planning +server that can only handle GTFS and GTFS-realtime data, you might build +your own GTFS-realtime feeds in the following situations: + +* A transit company offers service alerts only via Twitter or an RSS + feed. +* You can access vehicle positions or estimated arrivals from a feed + in a format such as SIRI, NextBus or BusTime. +* You have interpolated your own vehicle positions based on + GTFS-realtime trip updates. +* You have interpolated your own trip updates based on vehicle + positions. + +### Building Protocol Buffer Elements + +When you generate source files using the `protoc` command, there is a +*builder* class created for each element type. To create an element to +include in a protocol buffer, you use its builder to construct the +element. + +For example, a service alert entity uses the `Alert` class. To +construct your own service alert, you would use the `Alert.Builder` +class. The `Alert` class contains a static method called +`newBuilder()` to create an instance of `Alert.Builder`. + +```java +Alert.Builder alert = Alert.newBuilder(); +``` + +You can now set the various elements that describe a service alert. + +```java +alert.setCause(Cause.ACCIDENT); + +alert.setEffect(Effect.DETOUR); +``` + +Most elements will be more complex than this; you will need to build +them in a similar manner before adding them to the alert. For example, +the header text for a service alert uses the `TranslatedString` +element type, which contains one or more translations of a single +string. + +```java +Translation.Builder translation = Translation.newBuilder(); + +translation.setText("Car accident"); + +TranslatedString.Builder translatedString = TranslatedString.newBuilder(); + +translatedString.addTranslation(translation); + +alert.setHeaderText(translatedString); +``` + +In actual fact, you can chain together these calls, since the builder +methods return the builder. The first two lines of the above code can be +shortened as follows: + +```java +Translation.Builder translation = Translation.newBuilder().setText("Car accident"); +``` + +For repeating elements (such as the `informed_entity` field), use the +`addElementName()` method. In the case of `informed_entity`, +this would be `addInformedEntity()`. The following code adds an +informed entity to the alert for a route with an ID of 102: + +```java +EntitySelector.Builder entity = EntitySelector.newBuilder().setRouteId("102"); + +alert.addInformedEntity(entity); +``` + +### Creating a Complete Protocol Buffer + +The previous section showed the basics of creating a service alert +message, but a protocol buffer feed has more to it than just a single +entity. It can have multiple entities, and you must also include the +GTFS-realtime header. The header can be created as follows: + +```java +FeedHeader.Builder header = FeedHeader.newBuilder(); + +header.setGtfsRealtimeVersion("1.0"); +``` + +A single service alert (or a trip update, or a vehicle position) is +contained within a `FeedEntity` object. Each `FeedEntity` in a feed +must have a unique ID. The following code creates the `FeedEntity` +using the `alert` object created in the previous section. + +```java +FeedEntity.Builder entity = FeedEntity.newBuilder(); + +entity.setId("SOME UNIQUE ID"); + +entity.setAlert(alert); +``` + +Once you have the header and an entity you can create the feed as +follows: + +```java +FeedMessage.Builder message = FeedMessage.newBuilder(); + +message.setHeader(header); + +message.addEntity(entity); +``` + +***Note**: A feed with no entities is also valid; in the middle of the night +there may be no vehicle positions or trip updates, and there may +frequently be no service alerts.* + +Once you have created this object, you can turn it into a +`FeedMessage` by calling `build()`. + +``` +FeedMessage feed = message.build(); +``` + +This will give you a `FeedMessage` object just like when you parse a +third-party feed using `FeedMessage.parseFrom()`. + +### Full Source Code + +Piecing together all of the code covered so far in this chapter, you +could create a service alert feed (using a fictional detour) using the +following code. + +This example makes use of a helper method to build translated strings, +since it needs to be done a number of times. If you want to create the +alert in multiple languages, you would need to change this method +accordingly. + +```java +public class SampleServicesAlertsFeedCreator { + // Helper method to simplify creation of translated strings + + private TranslatedString translatedString(String str) { + Translation.Builder translation = Translation.newBuilder().setText(str); + + return TranslatedString.newBuilder().addTranslation(translation).build(); + } + + public FeedMessage create() { + Alert.Builder alert = Alert.newBuilder(); + + alert.setCause(Cause.ACCIDENT); + alert.setEffect(Effect.DETOUR); + alert.setUrl(translatedString("http://www.example.com")); + alert.setHeaderText(translatedString("Car accident on 14th Street")); + + alert.setDescriptionText(translatedString( + "Please be aware that 14th Street is closed due to a car accident" + )); + + // Loop over several route IDs to mark them as impacted + + String impactedRouteIds[] = { "102", "103" }; + + for (int i = 0; i < impactedRouteIds.length; i++) { + EntitySelector.Builder entity = EntitySelector.newBuilder(); + + entity.setRouteId(impactedRouteIds[i]); + + alert.addInformedEntity(entity); + } + + // Create the alert container entity + + FeedEntity.Builder entity = FeedEntity.newBuilder(); + + entity.setId("1"); + entity.setAlert(alert); + + // Build the feed header + + FeedHeader.Builder header = FeedHeader.newBuilder(); + + header.setGtfsRealtimeVersion("1.0"); + + // Build the feed using the header and entity + + FeedMessage.Builder message = FeedMessage.newBuilder(); + + message.setHeader(header); + message.addEntity(entity); + + // Return the built FeedMessage + return message.build(); + } +} +``` + +### Modifying an Existing Protocol Buffer + +In some circumstances you might want to modify an existing protocol +buffer. For example, consider a case where you have access to a service +alerts feed, but also want to add additional service alerts that you +parsed from Twitter. The following diagram demonstrates this: + +![Modified Feed](images/modified-feed.png) + +In this case, you can turn a `FeedMessage` object into a +`FeedMessage.Builder` object by calling to the `toBuilder()`` method. +You can then add additional alerts as required and create a new feed. + +```java +// Parse some third-party feed + +URL url = new URL("http://example.com/alerts.pb"); + +InputStream is = url.openStream(); + +FeedMessage message = GtfsRealtime.FeedMessage.parseFrom(is); + +// Convert existing feed into a builder + +FeedMessage.Builder builder = message.toBuilder(); + +Alert.Builder alert = Alert.newBuilder(); + +// Add the details of the alert here + +// Create the alert entity + +FeedEntity.Builder entity = FeedEntity.newBuilder(); + +entity.setId("SOME ID"); +entity.setAlert(alert); + +// Add the new entity to the builder +builder.addEntity(entity); + +// Build the update the FeedMessage +message = builder.build(); +``` + +### Saving a Protocol Buffer File + +Once you have created a protocol buffer, the next step is to output it +so other systems that read GTFS-realtime feeds can consume it (such as +for others who publish real-time data in their apps or web sites). + +Typically, you would generate a new version of the feed every `X` +seconds, then save (or upload) it each time to your web server (see the +next section for discussion on frequency of updates). + +The raw protocol buffer bytes can be output using the `writeTo()` +method on the `FeedMessage` object. This method accepts an +`OutputStream` object as its only argument. + +For example, to output the service alerts feed created in this chapter +to a file, you can use the `FileOutputStream` class. + +Note: While there are no specific rules for naming a protocol buffer, +often the `.pb` extension is used. + +```java +SampleServicesAlertsFeedCreator creator = new SampleServicesAlertsFeedCreator(); + +FeedMessage message = creator.create(); + +File file = new File("/path/to/output/alerts.pb"); + +OutputStream outputStream = new FileOutputStream(file); + +message.writeTo(outputStream); +``` + +### Serving a Protocol Buffer File + +The recommended content type header value to use when serving a Protocol +Buffer file is **application/octet-stream**. In Apache HTTP Server, you +can set the following configuration parameter to serve `.pb` files +with this content type: + +``` +AddType application/octet-stream .pb +``` + +If you are using nginx for your web server, you can add the following +entry to the nginx `mime.types` file: + +``` +types { + ... + + application/octet-stream pb; +} +``` + +### Frequency of Updates + +When publishing your own feed, the frequency in which you update the +feed on your web server depends on how frequently the source data is +updated. + +It is important to take into account the capabilities of your servers +when providing a GTFS-realtime feed, as the more frequently the data is +updated, the more resources that are required. Each of the GTFS-realtime +message types has slightly different needs: + +* **Vehicle Positions.** These will need updating very frequently, as + presumably the vehicles on your transit network are always moving. A + vehicle position feed could update as frequently as every 10-15 + seconds. +* **Trip Updates.** These will need updating very frequently, although + perhaps not as frequently as vehicle positions. Estimates would + constantly be refined by new vehicle positions, but a single + movement (or lack of movement) for a vehicle is not likely to make a + huge difference to estimates. A trip updates feed could update every + 10-30 seconds. +* **Service Alerts.** These will typically change far less frequently + then vehicle positions or trip updates. A system that triggered an + update to the service alerts feed only when a new alert was entered + into the system would be far more efficient than automatically doing + it every `X` seconds. + +To summarize: + +- **Vehicle Positions.** Update every 10-15 seconds. +- **Trip Updates.** Update every 10-30 seconds. +- **Service Alerts.** Triggered on demand when new data is available. + +If your transit agency does not run all night, an additional efficiency +would be to not update the feed at all when the network has shut down +for the night. + +In this case, once the last trip has finished, an empty protocol buffer +would be uploaded (that is, a valid buffer but with no entities), and +the next version would not be uploaded until the next morning when the +first trip starts. + diff --git a/gtfs-realtime-book/ch-12-conclusion.md b/gtfs-realtime-book/ch-12-conclusion.md new file mode 100644 index 0000000..a6092af --- /dev/null +++ b/gtfs-realtime-book/ch-12-conclusion.md @@ -0,0 +1,29 @@ +## Conclusion + +Thanks for reading *The Definitive Guide to GTFS-realtime*. I wrote this +book with the intention of providing a comprehensive guide to getting +started with consuming real-time data using the GTFS-realtime +specification. + +One of the biggest advantages GTFS-realtime has over other real-time +specifications or services (such NextBus or SIRI) is that it is designed +to complement GTFS feeds by sharing a common set of identifiers. + +On one hand, GTFS-realtime is very straightforward, as it provides only +three different types of messages (service alerts, vehicle positions and +trip updates), but there are a number of complexities involved in +getting started with both consuming and producing GTFS-realtime feeds, +such as setting up Protocol Buffers to read feeds, or understanding the +intent of trip updates. + +The key takeaways from this book are: + +* The different types of real-time data available in GTFS-realtime feeds. +* How to read data from a GTFS-realtime feed. +* How to apply the data you read from GTFS-realtime feeds to your own applications. +* They main concepts behind producing and publishing your own GTFS-realtime feeds. + +If you have enjoyed this book, please share it or feel free to contribution improvements or changes as necessary. + +*Quentin Zervaas* +August, 2015 diff --git a/gtfs-realtime-book/images/GTFS-realtime-consumption.png b/gtfs-realtime-book/images/GTFS-realtime-consumption.png new file mode 100644 index 0000000..83fee1a Binary files /dev/null and b/gtfs-realtime-book/images/GTFS-realtime-consumption.png differ diff --git a/gtfs-realtime-book/images/GTFS-realtime-direct-or-intermediate.png b/gtfs-realtime-book/images/GTFS-realtime-direct-or-intermediate.png new file mode 100644 index 0000000..8375211 Binary files /dev/null and b/gtfs-realtime-book/images/GTFS-realtime-direct-or-intermediate.png differ diff --git a/gtfs-realtime-book/images/GTFS-realtime-structure.png b/gtfs-realtime-book/images/GTFS-realtime-structure.png new file mode 100644 index 0000000..c1308c8 Binary files /dev/null and b/gtfs-realtime-book/images/GTFS-realtime-structure.png differ diff --git a/gtfs-realtime-book/images/Modified-Feed.png b/gtfs-realtime-book/images/Modified-Feed.png new file mode 100644 index 0000000..11ce531 Binary files /dev/null and b/gtfs-realtime-book/images/Modified-Feed.png differ diff --git a/gtfs-realtime-book/images/bearing-from-position.png b/gtfs-realtime-book/images/bearing-from-position.png new file mode 100644 index 0000000..0712a9c Binary files /dev/null and b/gtfs-realtime-book/images/bearing-from-position.png differ diff --git a/gtfs-realtime-book/images/boston-trip.png b/gtfs-realtime-book/images/boston-trip.png new file mode 100644 index 0000000..60ff67b Binary files /dev/null and b/gtfs-realtime-book/images/boston-trip.png differ diff --git a/gtfs-realtime-book/images/mbta_bus.png b/gtfs-realtime-book/images/mbta_bus.png new file mode 100644 index 0000000..a97a94d Binary files /dev/null and b/gtfs-realtime-book/images/mbta_bus.png differ diff --git a/gtfs-realtime-book/images/remaining_stops.png b/gtfs-realtime-book/images/remaining_stops.png new file mode 100644 index 0000000..277f401 Binary files /dev/null and b/gtfs-realtime-book/images/remaining_stops.png differ diff --git a/gtfs-realtime-book/images/stoptime_map.png b/gtfs-realtime-book/images/stoptime_map.png new file mode 100644 index 0000000..54a0c33 Binary files /dev/null and b/gtfs-realtime-book/images/stoptime_map.png differ diff --git a/gtfs-realtime-book/images/vehicle-position.png b/gtfs-realtime-book/images/vehicle-position.png new file mode 100644 index 0000000..5920ba7 Binary files /dev/null and b/gtfs-realtime-book/images/vehicle-position.png differ -- cgit v1.2.3