Taxi Trips Study - Step-by-Step¶
This notebook analyzes taxi trip data, mapping pickups and dropoffs to street segments and visualizing counts.
Data Sources¶
- Yellow NYC Taxis 2015: Sample taxi trip data for NYC.
In [1]:
Copied!
import urban_mapper as um
# Initialise UrbanMapper
mapper = um.UrbanMapper()
# Step 1: Create urban layer for street segments
layer = (
mapper.urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.build()
)
import urban_mapper as um
# Initialise UrbanMapper
mapper = um.UrbanMapper()
# Step 1: Create urban layer for street segments
layer = (
mapper.urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.build()
)
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[1], line 1 ----> 1 import urban_mapper as um 3 # Initialise UrbanMapper 4 mapper = um.UrbanMapper() File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/__init__.py:3 1 from loguru import logger ----> 3 from .mixins import ( 4 LoaderMixin, 5 EnricherMixin, 6 VisualMixin, 7 TableVisMixin, 8 AuctusSearchMixin, 9 PipelineGeneratorMixin, 10 UrbanPipelineMixin, 11 ) 12 from .modules import ( 13 LoaderBase, 14 CSVLoader, (...) 30 PipelineGeneratorFactory, 31 ) 33 from .urban_mapper import UrbanMapper File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/mixins/__init__.py:1 ----> 1 from .loader import LoaderMixin 2 from .enricher import EnricherMixin 3 from .visual import VisualMixin File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/mixins/loader.py:1 ----> 1 from urban_mapper.modules.loader.loader_factory import LoaderFactory 4 class LoaderMixin(LoaderFactory): 5 def __init__(self): File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/__init__.py:1 ----> 1 from .loader import LoaderBase, CSVLoader, ShapefileLoader, ParquetLoader 2 from .imputer import ( 3 GeoImputerBase, 4 SimpleGeoImputer, 5 AddressGeoImputer, 6 ) 7 from .filter import ( 8 GeoFilterBase, 9 BoundingBoxFilter, 10 ) File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/__init__.py:3 1 from .abc_loader import LoaderBase 2 from .loaders import CSVLoader, ShapefileLoader, ParquetLoader ----> 3 from .loader_factory import LoaderFactory 5 __all__ = [ 6 "LoaderBase", 7 "CSVLoader", (...) 10 "LoaderFactory", 11 ] File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/loader_factory.py:19 17 from urban_mapper.modules.loader.loaders.csv_loader import CSVLoader 18 from urban_mapper.modules.loader.loaders.parquet_loader import ParquetLoader ---> 19 from urban_mapper.modules.loader.loaders.raster_loader import RasterLoader # Importing RasterLoader of the new raster loader module 20 from urban_mapper.modules.loader.loaders.shapefile_loader import ShapefileLoader 21 from urban_mapper.utils import require_attributes File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/loaders/raster_loader.py:2 1 from ..abc_loader import LoaderBase ----> 2 import rasterio 3 from typing import Any 4 import numpy as np ModuleNotFoundError: No module named 'rasterio'
In [2]:
Copied!
# Step 2: Load taxi trip data
# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.
data = (
mapper.loader
.from_huggingface("oscur/taxisvis1M", number_of_rows=5000, streaming=True)
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.load()
)
data['pickup_longitude'] = data['pickup_longitude'].astype(float)
data['pickup_latitude'] = data['pickup_latitude'].astype(float)
data['dropoff_longitude'] = data['dropoff_longitude'].astype(float)
data['dropoff_latitude'] = data['dropoff_latitude'].astype(float)
# Step 2: Load taxi trip data
# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.
data = (
mapper.loader
.from_huggingface("oscur/taxisvis1M", number_of_rows=5000, streaming=True)
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.load()
)
data['pickup_longitude'] = data['pickup_longitude'].astype(float)
data['pickup_latitude'] = data['pickup_latitude'].astype(float)
data['dropoff_longitude'] = data['dropoff_longitude'].astype(float)
data['dropoff_latitude'] = data['dropoff_latitude'].astype(float)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[2], line 4 1 # Step 2: Load taxi trip data 2 # Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis. 3 data = ( ----> 4 mapper.loader 5 .from_huggingface("oscur/taxisvis1M", number_of_rows=5000, streaming=True) 6 .with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude") 7 .load() 8 ) 10 data['pickup_longitude'] = data['pickup_longitude'].astype(float) 11 data['pickup_latitude'] = data['pickup_latitude'].astype(float) NameError: name 'mapper' is not defined
In [3]:
Copied!
# Step 3: Impute missing coordinates
imputer_pickup = (
mapper.imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)
data = imputer_pickup.transform(data, layer)
imputer_dropoff = (
mapper.imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)
data = imputer_dropoff.transform(data, layer)
# Step 3: Impute missing coordinates
imputer_pickup = (
mapper.imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)
data = imputer_pickup.transform(data, layer)
imputer_dropoff = (
mapper.imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)
data = imputer_dropoff.transform(data, layer)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[3], line 3 1 # Step 3: Impute missing coordinates 2 imputer_pickup = ( ----> 3 mapper.imputer 4 .with_type("SimpleGeoImputer") 5 .on_columns("pickup_longitude", "pickup_latitude") 6 .build() 7 ) 8 data = imputer_pickup.transform(data, layer) 10 imputer_dropoff = ( 11 mapper.imputer 12 .with_type("SimpleGeoImputer") 13 .on_columns("dropoff_longitude", "dropoff_latitude") 14 .build() 15 ) NameError: name 'mapper' is not defined
In [4]:
Copied!
# Step 4: Filter to bounding box
filter_step = mapper.filter.with_type("BoundingBoxFilter").build()
data = filter_step.transform(data, layer)
# Step 4: Filter to bounding box
filter_step = mapper.filter.with_type("BoundingBoxFilter").build()
data = filter_step.transform(data, layer)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[4], line 2 1 # Step 4: Filter to bounding box ----> 2 filter_step = mapper.filter.with_type("BoundingBoxFilter").build() 3 data = filter_step.transform(data, layer) NameError: name 'mapper' is not defined
In [5]:
Copied!
# Step 5: Map pickups and dropoffs
import copy
tmp_layer = copy.deepcopy(layer)
_, mapped_pickups = layer.map_nearest_layer(
data,
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
_, mapped_dropoffs = tmp_layer.map_nearest_layer(
data,
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
# Step 5: Map pickups and dropoffs
import copy
tmp_layer = copy.deepcopy(layer)
_, mapped_pickups = layer.map_nearest_layer(
data,
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
_, mapped_dropoffs = tmp_layer.map_nearest_layer(
data,
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[5], line 3 1 # Step 5: Map pickups and dropoffs 2 import copy ----> 3 tmp_layer = copy.deepcopy(layer) 5 _, mapped_pickups = layer.map_nearest_layer( 6 data, 7 longitude_column="pickup_longitude", 8 latitude_column="pickup_latitude", 9 output_column="pickup_segment" 10 ) 12 _, mapped_dropoffs = tmp_layer.map_nearest_layer( 13 data, 14 longitude_column="dropoff_longitude", 15 latitude_column="dropoff_latitude", 16 output_column="dropoff_segment" 17 ) NameError: name 'layer' is not defined
In [6]:
Copied!
# Step 6: Enrich with counts
enricher_pickup = (
mapper.enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)
enriched_layer_pickup = enricher_pickup.enrich(mapped_pickups, layer)
enricher_dropoff = (
mapper.enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)
enriched_layer = enricher_dropoff.enrich(mapped_dropoffs, enriched_layer_pickup)
# Step 6: Enrich with counts
enricher_pickup = (
mapper.enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)
enriched_layer_pickup = enricher_pickup.enrich(mapped_pickups, layer)
enricher_dropoff = (
mapper.enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)
enriched_layer = enricher_dropoff.enrich(mapped_dropoffs, enriched_layer_pickup)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[6], line 3 1 # Step 6: Enrich with counts 2 enricher_pickup = ( ----> 3 mapper.enricher 4 .with_data(group_by="pickup_segment") 5 .count_by(output_column="pickup_count") 6 .build() 7 ) 8 enriched_layer_pickup = enricher_pickup.enrich(mapped_pickups, layer) 10 enricher_dropoff = ( 11 mapper.enricher 12 .with_data(group_by="dropoff_segment") 13 .count_by(output_column="dropoff_count") 14 .build() 15 ) NameError: name 'mapper' is not defined
In [7]:
Copied!
# Step 7: Visualize interactively
visualiser = (
mapper.visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB dark_matter", "colorbar_text_color": "white"})
.build()
)
fig = visualiser.render(enriched_layer.get_layer(), columns=["pickup_count", "dropoff_count"])
fig
# Step 7: Visualize interactively
visualiser = (
mapper.visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB dark_matter", "colorbar_text_color": "white"})
.build()
)
fig = visualiser.render(enriched_layer.get_layer(), columns=["pickup_count", "dropoff_count"])
fig
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[7], line 3 1 # Step 7: Visualize interactively 2 visualiser = ( ----> 3 mapper.visual 4 .with_type("Interactive") 5 .with_style({"tiles": "CartoDB dark_matter", "colorbar_text_color": "white"}) 6 .build() 7 ) 8 fig = visualiser.render(enriched_layer.get_layer(), columns=["pickup_count", "dropoff_count"]) 9 fig NameError: name 'mapper' is not defined