Taxi Trips Study - Pipeline¶
This notebook uses UrbanPipeline to analyze taxi trips, counting pickups and dropoffs.
Data Sources¶
- Yellow NYC Taxis 2015: Sample taxi trip data for NYC.
In [1]:
Copied!
import urban_mapper as um
# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.
data = (
um.UrbanMapper()
.loader
.from_huggingface("oscur/taxisvis1M", number_of_rows=5000, streaming=True)
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.load()
)
data['pickup_longitude'] = data['pickup_longitude'].astype(float)
data['pickup_latitude'] = data['pickup_latitude'].astype(float)
data['dropoff_longitude'] = data['dropoff_longitude'].astype(float)
data['dropoff_latitude'] = data['dropoff_latitude'].astype(float)
data.to_csv("./taxisvis1M.csv")
import urban_mapper as um
# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.
data = (
um.UrbanMapper()
.loader
.from_huggingface("oscur/taxisvis1M", number_of_rows=5000, streaming=True)
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.load()
)
data['pickup_longitude'] = data['pickup_longitude'].astype(float)
data['pickup_latitude'] = data['pickup_latitude'].astype(float)
data['dropoff_longitude'] = data['dropoff_longitude'].astype(float)
data['dropoff_latitude'] = data['dropoff_latitude'].astype(float)
data.to_csv("./taxisvis1M.csv")
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[1], line 1 ----> 1 import urban_mapper as um 3 # Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis. 4 data = ( 5 um.UrbanMapper() 6 .loader (...) 9 .load() 10 ) File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/__init__.py:3 1 from loguru import logger ----> 3 from .mixins import ( 4 LoaderMixin, 5 EnricherMixin, 6 VisualMixin, 7 TableVisMixin, 8 AuctusSearchMixin, 9 PipelineGeneratorMixin, 10 UrbanPipelineMixin, 11 ) 12 from .modules import ( 13 LoaderBase, 14 CSVLoader, (...) 30 PipelineGeneratorFactory, 31 ) 33 from .urban_mapper import UrbanMapper File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/mixins/__init__.py:1 ----> 1 from .loader import LoaderMixin 2 from .enricher import EnricherMixin 3 from .visual import VisualMixin File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/mixins/loader.py:1 ----> 1 from urban_mapper.modules.loader.loader_factory import LoaderFactory 4 class LoaderMixin(LoaderFactory): 5 def __init__(self): File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/__init__.py:1 ----> 1 from .loader import LoaderBase, CSVLoader, ShapefileLoader, ParquetLoader 2 from .imputer import ( 3 GeoImputerBase, 4 SimpleGeoImputer, 5 AddressGeoImputer, 6 ) 7 from .filter import ( 8 GeoFilterBase, 9 BoundingBoxFilter, 10 ) File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/__init__.py:3 1 from .abc_loader import LoaderBase 2 from .loaders import CSVLoader, ShapefileLoader, ParquetLoader ----> 3 from .loader_factory import LoaderFactory 5 __all__ = [ 6 "LoaderBase", 7 "CSVLoader", (...) 10 "LoaderFactory", 11 ] File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/loader_factory.py:19 17 from urban_mapper.modules.loader.loaders.csv_loader import CSVLoader 18 from urban_mapper.modules.loader.loaders.parquet_loader import ParquetLoader ---> 19 from urban_mapper.modules.loader.loaders.raster_loader import RasterLoader # Importing RasterLoader of the new raster loader module 20 from urban_mapper.modules.loader.loaders.shapefile_loader import ShapefileLoader 21 from urban_mapper.utils import require_attributes File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/loaders/raster_loader.py:2 1 from ..abc_loader import LoaderBase ----> 2 import rasterio 3 from typing import Any 4 import numpy as np ModuleNotFoundError: No module named 'rasterio'
In [2]:
Copied!
import urban_mapper as um
from urban_mapper.pipeline import UrbanPipeline
# Define the pipeline
pipeline = UrbanPipeline([
("urban_layer", (
um.UrbanMapper().urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.with_mapping(
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
.with_mapping(
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
.build()
)),
("loader", (
um.UrbanMapper().loader
.from_file("./taxisvis1M.csv")
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.build()
)),
("impute_pickup", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)),
("impute_dropoff", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)),
("filter", um.UrbanMapper().filter.with_type("BoundingBoxFilter").build()),
("enrich_pickups", (
um.UrbanMapper().enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)),
("enrich_dropoffs", (
um.UrbanMapper().enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)),
("visualiser", (
um.UrbanMapper().visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB dark_matter", "colorbar_text_color": "white"})
.build()
))
])
import urban_mapper as um
from urban_mapper.pipeline import UrbanPipeline
# Define the pipeline
pipeline = UrbanPipeline([
("urban_layer", (
um.UrbanMapper().urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.with_mapping(
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
.with_mapping(
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
.build()
)),
("loader", (
um.UrbanMapper().loader
.from_file("./taxisvis1M.csv")
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.build()
)),
("impute_pickup", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)),
("impute_dropoff", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)),
("filter", um.UrbanMapper().filter.with_type("BoundingBoxFilter").build()),
("enrich_pickups", (
um.UrbanMapper().enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)),
("enrich_dropoffs", (
um.UrbanMapper().enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)),
("visualiser", (
um.UrbanMapper().visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB dark_matter", "colorbar_text_color": "white"})
.build()
))
])
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[2], line 1 ----> 1 import urban_mapper as um 2 from urban_mapper.pipeline import UrbanPipeline 4 # Define the pipeline File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/__init__.py:3 1 from loguru import logger ----> 3 from .mixins import ( 4 LoaderMixin, 5 EnricherMixin, 6 VisualMixin, 7 TableVisMixin, 8 AuctusSearchMixin, 9 PipelineGeneratorMixin, 10 UrbanPipelineMixin, 11 ) 12 from .modules import ( 13 LoaderBase, 14 CSVLoader, (...) 30 PipelineGeneratorFactory, 31 ) 33 from .urban_mapper import UrbanMapper File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/mixins/__init__.py:1 ----> 1 from .loader import LoaderMixin 2 from .enricher import EnricherMixin 3 from .visual import VisualMixin File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/mixins/loader.py:1 ----> 1 from urban_mapper.modules.loader.loader_factory import LoaderFactory 4 class LoaderMixin(LoaderFactory): 5 def __init__(self): File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/__init__.py:1 ----> 1 from .loader import LoaderBase, CSVLoader, ShapefileLoader, ParquetLoader 2 from .imputer import ( 3 GeoImputerBase, 4 SimpleGeoImputer, 5 AddressGeoImputer, 6 ) 7 from .filter import ( 8 GeoFilterBase, 9 BoundingBoxFilter, 10 ) File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/__init__.py:3 1 from .abc_loader import LoaderBase 2 from .loaders import CSVLoader, ShapefileLoader, ParquetLoader ----> 3 from .loader_factory import LoaderFactory 5 __all__ = [ 6 "LoaderBase", 7 "CSVLoader", (...) 10 "LoaderFactory", 11 ] File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/loader_factory.py:19 17 from urban_mapper.modules.loader.loaders.csv_loader import CSVLoader 18 from urban_mapper.modules.loader.loaders.parquet_loader import ParquetLoader ---> 19 from urban_mapper.modules.loader.loaders.raster_loader import RasterLoader # Importing RasterLoader of the new raster loader module 20 from urban_mapper.modules.loader.loaders.shapefile_loader import ShapefileLoader 21 from urban_mapper.utils import require_attributes File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/loaders/raster_loader.py:2 1 from ..abc_loader import LoaderBase ----> 2 import rasterio 3 from typing import Any 4 import numpy as np ModuleNotFoundError: No module named 'rasterio'
In [3]:
Copied!
# Execute the pipeline
mapped_data, enriched_layer = pipeline.compose_transform()
# Execute the pipeline
mapped_data, enriched_layer = pipeline.compose_transform()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[3], line 2 1 # Execute the pipeline ----> 2 mapped_data, enriched_layer = pipeline.compose_transform() NameError: name 'pipeline' is not defined
In [4]:
Copied!
# Visualize results
fig = pipeline.visualise(["pickup_count", "dropoff_count"])
fig
# Visualize results
fig = pipeline.visualise(["pickup_count", "dropoff_count"])
fig
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[4], line 2 1 # Visualize results ----> 2 fig = pipeline.visualise(["pickup_count", "dropoff_count"]) 3 fig NameError: name 'pipeline' is not defined
In [5]:
Copied!
# Save the pipeline
pipeline.save("./taxi_pipeline.dill")
# Save the pipeline
pipeline.save("./taxi_pipeline.dill")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[5], line 2 1 # Save the pipeline ----> 2 pipeline.save("./taxi_pipeline.dill") NameError: name 'pipeline' is not defined