Filter¶
In this notebook, we’ll zoom in on the important bits of your data, make sure only the data points within your just querried urban_layer
remains!
Data source used:
- PLUTO data from NYC Open Data. https://www.nyc.gov/content/planning/pages/resources/datasets/mappluto-pluto-change
import urban_mapper as um
# Get UrbanMapper rolling
mapper = um.UrbanMapper()
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[1], line 1 ----> 1 import urban_mapper as um 3 # Get UrbanMapper rolling 4 mapper = um.UrbanMapper() File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/__init__.py:3 1 from loguru import logger ----> 3 from .mixins import ( 4 LoaderMixin, 5 EnricherMixin, 6 VisualMixin, 7 TableVisMixin, 8 AuctusSearchMixin, 9 PipelineGeneratorMixin, 10 UrbanPipelineMixin, 11 ) 12 from .modules import ( 13 LoaderBase, 14 CSVLoader, (...) 30 PipelineGeneratorFactory, 31 ) 33 from .urban_mapper import UrbanMapper File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/mixins/__init__.py:1 ----> 1 from .loader import LoaderMixin 2 from .enricher import EnricherMixin 3 from .visual import VisualMixin File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/mixins/loader.py:1 ----> 1 from urban_mapper.modules.loader.loader_factory import LoaderFactory 4 class LoaderMixin(LoaderFactory): 5 def __init__(self): File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/__init__.py:1 ----> 1 from .loader import LoaderBase, CSVLoader, ShapefileLoader, ParquetLoader 2 from .imputer import ( 3 GeoImputerBase, 4 SimpleGeoImputer, 5 AddressGeoImputer, 6 ) 7 from .filter import ( 8 GeoFilterBase, 9 BoundingBoxFilter, 10 ) File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/__init__.py:3 1 from .abc_loader import LoaderBase 2 from .loaders import CSVLoader, ShapefileLoader, ParquetLoader ----> 3 from .loader_factory import LoaderFactory 5 __all__ = [ 6 "LoaderBase", 7 "CSVLoader", (...) 10 "LoaderFactory", 11 ] File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/loader_factory.py:19 17 from urban_mapper.modules.loader.loaders.csv_loader import CSVLoader 18 from urban_mapper.modules.loader.loaders.parquet_loader import ParquetLoader ---> 19 from urban_mapper.modules.loader.loaders.raster_loader import RasterLoader # Importing RasterLoader of the new raster loader module 20 from urban_mapper.modules.loader.loaders.shapefile_loader import ShapefileLoader 21 from urban_mapper.utils import require_attributes File ~/checkouts/readthedocs.org/user_builds/urbanmapper/checkouts/70/src/urban_mapper/modules/loader/loaders/raster_loader.py:2 1 from ..abc_loader import LoaderBase ----> 2 import rasterio 3 from typing import Any 4 import numpy as np ModuleNotFoundError: No module named 'rasterio'
Loading Data and Creating a Layer¶
First, let’s load some data and create a layer for say Downtown Brooklyn
.
Note that:
- Loader example can be seen in
examples/Basics/loader.ipynb
to know how to load your own data. - Urban Layer example can be seen in
examples/Basics/urban_layer.ipynb
to know how to query your layer e.g of Downtown brooklyn streets intersections.
# Load data
# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.
data = (
mapper
.loader
.from_huggingface("oscur/pluto", number_of_rows=5000, streaming=True).with_columns("longitude", "latitude").load()
# From the loader module, from the following file within the HuggingFace OSCUR datasets hub and with the `longitude` and `latitude`
)
# Create urban layer
layer = (
mapper.urban_layer.with_type("streets_intersections") # From the urban_layer module and with type streets_intersections
.from_place("Downtown Brooklyn, New York City, USA") # From a place
.build()
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[2], line 4 1 # Load data 2 # Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis. 3 data = ( ----> 4 mapper 5 .loader 6 .from_huggingface("oscur/pluto", number_of_rows=5000, streaming=True).with_columns("longitude", "latitude").load() 7 # From the loader module, from the following file within the HuggingFace OSCUR datasets hub and with the `longitude` and `latitude` 8 ) 10 # Create urban layer 11 layer = ( 12 mapper.urban_layer.with_type("streets_intersections") # From the urban_layer module and with type streets_intersections 13 .from_place("Downtown Brooklyn, New York City, USA") # From a place 14 .build() 15 ) NameError: name 'mapper' is not defined
Applying the Filter¶
Now we've got all the ingradients, let’s use the BoundingBoxFilter
to keep only the data points within our layer’s bounds. It’s like putting a spotlight on Downtown Brooklyn say you had data for the whole of New York City.
# Apply filter
filtered_data = (
mapper
.filter # From the filter module
.with_type("BoundingBoxFilter") # With type BoundingBoxFilter which is a filter that filters out your data points based on the bounding box of the layer
.transform(data, layer) # Transform the data with the layer previously queried
)
filtered_data
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[3], line 3 1 # Apply filter 2 filtered_data = ( ----> 3 mapper 4 .filter # From the filter module 5 .with_type("BoundingBoxFilter") # With type BoundingBoxFilter which is a filter that filters out your data points based on the bounding box of the layer 6 .transform(data, layer) # Transform the data with the layer previously queried 7 ) 8 filtered_data NameError: name 'mapper' is not defined
Be Able To Preview Your Filter¶
Curious about your filter? Use preview()
to see its setup—super useful when you’re borrowing someone else’s analysis!
# Preview filter
print(mapper.filter.preview())
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[4], line 2 1 # Preview filter ----> 2 print(mapper.filter.preview()) NameError: name 'mapper' is not defined
Provide many different datasets to the same filter¶
You can load many datasets and feed the filter with a dictionary. In that case, the output will also be a dictonary. See the next simple example.
If you want to apply the filter to a specific dataset of the dictionary, provide .with_data(data_id=...)
to the filter.
# Load CSV data
data1 = (
mapper
.loader
.from_huggingface("oscur/pluto", number_of_rows=1000, streaming=True).with_columns("longitude", "latitude").load()
# From the loader module, from the following file and with the `longitude` and `latitude`
)
# Load Parquet data
data2 = (
mapper
.loader
.from_huggingface("oscur/taxisvis1M", number_of_rows=1000, streaming=True) # To update with your own path
.with_columns("pickup_longitude", "pickup_latitude").load() # Inform your long and lat columns
)
data = {
"pluto_data": data1,
"taxi_data": data2,
}
# Apply filter
filtered_data = (
mapper
.filter # From the filter module
.with_type("BoundingBoxFilter") # With type BoundingBoxFilter which is a filter that filters out your data points based on the bounding box of the layer
.transform(data, layer) # Transform the data with the layer previously queried
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[5], line 3 1 # Load CSV data 2 data1 = ( ----> 3 mapper 4 .loader 5 .from_huggingface("oscur/pluto", number_of_rows=1000, streaming=True).with_columns("longitude", "latitude").load() 6 # From the loader module, from the following file and with the `longitude` and `latitude` 7 ) 9 # Load Parquet data 10 data2 = ( 11 mapper 12 .loader 13 .from_huggingface("oscur/taxisvis1M", number_of_rows=1000, streaming=True) # To update with your own path 14 .with_columns("pickup_longitude", "pickup_latitude").load() # Inform your long and lat columns 15 ) NameError: name 'mapper' is not defined
filtered_data["pluto_data"]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[6], line 1 ----> 1 filtered_data["pluto_data"] NameError: name 'filtered_data' is not defined
filtered_data["taxi_data"]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[7], line 1 ----> 1 filtered_data["taxi_data"] NameError: name 'filtered_data' is not defined
More Geo Filter primitives ?¶
Wants more? Come shout that out on https://github.com/VIDA-NYU/UrbanMapper/issues/5
Wrapping Up¶
Well done, you star! You’ve filtered your data to focus on what matters. Next stop: try enricher
or visualiser
.