Taxi Trips Study - Advanced Pipeline¶
This notebook analyzes taxi trips with multiple enrichments: pickup counts, dropoff counts, and average fare amount.
Data Sources¶
- Yellow NYC Taxis 2015: Sample taxi trip data for NYC.
In [ ]:
Copied!
import urban_mapper as um
data = (
um.UrbanMapper()
.loader
.from_huggingface("oscur/taxisvis1M")
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.load()
)
data['pickup_longitude'] = data['pickup_longitude'].astype(float)
data['pickup_latitude'] = data['pickup_latitude'].astype(float)
data['dropoff_longitude'] = data['dropoff_longitude'].astype(float)
data['dropoff_latitude'] = data['dropoff_latitude'].astype(float)
data.to_csv("./taxisvis1M.csv")
import urban_mapper as um
data = (
um.UrbanMapper()
.loader
.from_huggingface("oscur/taxisvis1M")
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.load()
)
data['pickup_longitude'] = data['pickup_longitude'].astype(float)
data['pickup_latitude'] = data['pickup_latitude'].astype(float)
data['dropoff_longitude'] = data['dropoff_longitude'].astype(float)
data['dropoff_latitude'] = data['dropoff_latitude'].astype(float)
data.to_csv("./taxisvis1M.csv")
In [ ]:
Copied!
import urban_mapper as um
from urban_mapper.pipeline import UrbanPipeline
# Define the pipeline
pipeline = UrbanPipeline([
("urban_layer", (
um.UrbanMapper().urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.with_mapping(
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
.with_mapping(
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
.build()
)),
("loader", (
um.UrbanMapper().loader
.from_file("./taxisvis1M.csv")
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.build()
)),
("impute_pickup", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)),
("impute_dropoff", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)),
("filter", um.UrbanMapper().filter.with_type("BoundingBoxFilter").build()),
("enrich_pickups", (
um.UrbanMapper().enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)),
("enrich_dropoffs", (
um.UrbanMapper().enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)),
("enrich_fare_amount", (
um.UrbanMapper().enricher
.with_data(group_by="pickup_segment", values_from="fare_amount")
.aggregate_by(method="mean", output_column="avg_fare_amount")
.build()
)),
("visualiser", (
um.UrbanMapper().visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB dark_matter", "colorbar_text_color": "white"})
.build()
))
])
import urban_mapper as um
from urban_mapper.pipeline import UrbanPipeline
# Define the pipeline
pipeline = UrbanPipeline([
("urban_layer", (
um.UrbanMapper().urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.with_mapping(
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
.with_mapping(
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
.build()
)),
("loader", (
um.UrbanMapper().loader
.from_file("./taxisvis1M.csv")
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.build()
)),
("impute_pickup", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)),
("impute_dropoff", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)),
("filter", um.UrbanMapper().filter.with_type("BoundingBoxFilter").build()),
("enrich_pickups", (
um.UrbanMapper().enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)),
("enrich_dropoffs", (
um.UrbanMapper().enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)),
("enrich_fare_amount", (
um.UrbanMapper().enricher
.with_data(group_by="pickup_segment", values_from="fare_amount")
.aggregate_by(method="mean", output_column="avg_fare_amount")
.build()
)),
("visualiser", (
um.UrbanMapper().visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB dark_matter", "colorbar_text_color": "white"})
.build()
))
])
In [ ]:
Copied!
# Execute the pipeline
mapped_data, enriched_layer = pipeline.compose_transform()
# Execute the pipeline
mapped_data, enriched_layer = pipeline.compose_transform()
In [ ]:
Copied!
# Visualize results
fig = pipeline.visualise(["pickup_count", "dropoff_count", "avg_fare_amount"])
fig
# Visualize results
fig = pipeline.visualise(["pickup_count", "dropoff_count", "avg_fare_amount"])
fig
In [ ]:
Copied!
# Save the pipeline
pipeline.save("./taxi_advanced_pipeline.dill")
# Save the pipeline
pipeline.save("./taxi_advanced_pipeline.dill")
In [ ]:
Copied!