EU Restaurants Study – Easy Pipeline¶

Data Source:

https://www.kaggle.com/datasets/stefanoleone992/tripadvisor-european-restaurants

In [ ]:

Copied!





#####################################################################################

# ⚠️ INFORMATION ABOUT THE CURRENT CELL ⚠️
# Some data wrangling are necessary due to the raw data being not
# computable enough hence the "manual" load to create a pre-processed
# version of the dataset

#####################################################################################

from urban_mapper import CSVLoader
import urban_mapper

file_path = "./tripadvisor_european_restaurants.csv"
df = CSVLoader(file_path, latitude_column="latitude", longitude_column="longitude")._load_data_from_file()

df = df.reset_index(drop=True)
df = df .loc[:,~df.columns.duplicated()]
print(f"df Duplicated Indexs: {df.index.duplicated().sum()}")
df.to_parquet("./tripadvisor_european_restaurants.parquet")

mapper = urban_mapper.UrbanMapper()
mapper.table_vis.interactive_display(df)
#####################################################################################

# ⚠️ INFORMATION ABOUT THE CURRENT CELL ⚠️
# Some data wrangling are necessary due to the raw data being not
# computable enough hence the "manual" load to create a pre-processed
# version of the dataset

#####################################################################################

from urban_mapper import CSVLoader
import urban_mapper

file_path = "./tripadvisor_european_restaurants.csv"
df = CSVLoader(file_path, latitude_column="latitude", longitude_column="longitude")._load_data_from_file()

df = df.reset_index(drop=True)
df = df .loc[:,~df.columns.duplicated()]
print(f"df Duplicated Indexs: {df.index.duplicated().sum()}")
df.to_parquet("./tripadvisor_european_restaurants.parquet")

mapper = urban_mapper.UrbanMapper()
mapper.table_vis.interactive_display(df)

In [ ]:

Copied!





import pandas as pd
from typing import Optional

def no_yes_prop(series: pd.Series) -> Optional[float]:
    if series.empty:
        return None
    
    valid_series = series.str.upper().isin(['Y', 'N'])
    if not valid_series.any():
        return None
        
    proportion = (series.str.upper() == 'Y').mean()
    return proportion

def most_frequent_city(series: pd.Series) -> Optional[str]:
    if series.empty:
        return None
    
    valid_series = series[series.notna() & series.apply(lambda x: isinstance(x, str))]
    if valid_series.empty:
        return None
        
    mode = valid_series.mode()
    return mode.iloc[0] if not mode.empty else None
import pandas as pd
from typing import Optional

def no_yes_prop(series: pd.Series) -> Optional[float]:
    if series.empty:
        return None
    
    valid_series = series.str.upper().isin(['Y', 'N'])
    if not valid_series.any():
        return None
        
    proportion = (series.str.upper() == 'Y').mean()
    return proportion

def most_frequent_city(series: pd.Series) -> Optional[str]:
    if series.empty:
        return None
    
    valid_series = series[series.notna() & series.apply(lambda x: isinstance(x, str))]
    if valid_series.empty:
        return None
        
    mode = valid_series.mode()
    return mode.iloc[0] if not mode.empty else None

In [ ]:

Copied!





from urban_mapper.pipeline import UrbanPipeline
import urban_mapper as um

pipeline = UrbanPipeline([
    ("urban_layer", (
        um.UrbanMapper().urban_layer
        .with_type("custom_urban_layer")
        .from_file("./Europe GeoJSON.geojson")
        .with_mapping(
            longitude_column="temporary_longitude",
            latitude_column="temporary_latitude",
            output_column="nearest_country"
        )
        .build()
    )),
    ("loader", (
        um.UrbanMapper().loader
        .from_file("./tripadvisor_european_restaurants.parquet")
        .with_columns(longitude_column="longitude", latitude_column="latitude")
        .build()
    )),
    ("impute", (
        um.UrbanMapper().imputer
        .with_type("SimpleGeoImputer")
        .on_columns("longitude", "latitude")
        .build()
    )),
    ("filter", um.UrbanMapper().filter.with_type("BoundingBoxFilter").build()),
    ("enrich_restaurants_count", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country")
        .count_by(output_column="restaurants_count")
        .build()
    )),
    ("enrich_vegetarian_friendly", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="vegetarian_friendly")
        .aggregate_by(method=no_yes_prop, output_column="vegetarian_prop")
        .build()
    )),
    ("enrich_vegan_options", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="vegan_options")
        .aggregate_by(method=no_yes_prop, output_column="vegan_options_prop")
        .build()
    )),
    ("enrich_gluten-free", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="gluten_free")
        .aggregate_by(method=no_yes_prop, output_column="gluten_free_prop")
        .build()
    )),
    ("enrich_open_days_per_week", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="open_days_per_week")
        .aggregate_by(method="mean", output_column="open_days_per_week_avg")
        .build()
    )),
    ("enrich_avg_rating", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="avg_rating")
        .aggregate_by(method="mean", output_column="overall_avg_rating")
        .build()
    )),
    ("enrich_total_reviews_per_count", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="total_reviews_count")
        .aggregate_by(method="mean", output_column="total_reviews_count_avg")
        .build()
    )),
    ("enrich_most_frequent_city", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="city")
        .aggregate_by(method=most_frequent_city, output_column="most_frequent_city")
        .build()
    )),
    ("visualiser", (
        um.UrbanMapper().visual
        .with_type("Interactive")
        .with_style({
            "tiles": "CartoDB dark_matter",
            "colorbar_text_color": "white",
        })
        .build()
    ))
])
from urban_mapper.pipeline import UrbanPipeline
import urban_mapper as um

pipeline = UrbanPipeline([
    ("urban_layer", (
        um.UrbanMapper().urban_layer
        .with_type("custom_urban_layer")
        .from_file("./Europe GeoJSON.geojson")
        .with_mapping(
            longitude_column="temporary_longitude",
            latitude_column="temporary_latitude",
            output_column="nearest_country"
        )
        .build()
    )),
    ("loader", (
        um.UrbanMapper().loader
        .from_file("./tripadvisor_european_restaurants.parquet")
        .with_columns(longitude_column="longitude", latitude_column="latitude")
        .build()
    )),
    ("impute", (
        um.UrbanMapper().imputer
        .with_type("SimpleGeoImputer")
        .on_columns("longitude", "latitude")
        .build()
    )),
    ("filter", um.UrbanMapper().filter.with_type("BoundingBoxFilter").build()),
    ("enrich_restaurants_count", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country")
        .count_by(output_column="restaurants_count")
        .build()
    )),
    ("enrich_vegetarian_friendly", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="vegetarian_friendly")
        .aggregate_by(method=no_yes_prop, output_column="vegetarian_prop")
        .build()
    )),
    ("enrich_vegan_options", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="vegan_options")
        .aggregate_by(method=no_yes_prop, output_column="vegan_options_prop")
        .build()
    )),
    ("enrich_gluten-free", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="gluten_free")
        .aggregate_by(method=no_yes_prop, output_column="gluten_free_prop")
        .build()
    )),
    ("enrich_open_days_per_week", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="open_days_per_week")
        .aggregate_by(method="mean", output_column="open_days_per_week_avg")
        .build()
    )),
    ("enrich_avg_rating", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="avg_rating")
        .aggregate_by(method="mean", output_column="overall_avg_rating")
        .build()
    )),
    ("enrich_total_reviews_per_count", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="total_reviews_count")
        .aggregate_by(method="mean", output_column="total_reviews_count_avg")
        .build()
    )),
    ("enrich_most_frequent_city", (
        um.UrbanMapper().enricher
        .with_data(group_by="nearest_country", values_from="city")
        .aggregate_by(method=most_frequent_city, output_column="most_frequent_city")
        .build()
    )),
    ("visualiser", (
        um.UrbanMapper().visual
        .with_type("Interactive")
        .with_style({
            "tiles": "CartoDB dark_matter",
            "colorbar_text_color": "white",
        })
        .build()
    ))
])

In [ ]:

Copied!

# Execute the pipeline
mapped_data, enriched_layer = pipeline.compose_transform()
# Execute the pipeline
mapped_data, enriched_layer = pipeline.compose_transform()

In [ ]:

Copied!





# Visualise the enriched metrics
fig = pipeline.visualise([
    "restaurants_count",
    "vegetarian_prop",
    "vegan_options_prop",
    "gluten_free_prop",
    "open_days_per_week_avg",
    "overall_avg_rating",
    "total_reviews_count_avg",
    "most_frequent_city"
])

fig
# Visualise the enriched metrics
fig = pipeline.visualise([
    "restaurants_count",
    "vegetarian_prop",
    "vegan_options_prop",
    "gluten_free_prop",
    "open_days_per_week_avg",
    "overall_avg_rating",
    "total_reviews_count_avg",
    "most_frequent_city"
])

fig

In [ ]:

Copied!

# Save the pipeline
pipeline.save("./EU_restaurant_counts.dill")
# Save the pipeline
pipeline.save("./EU_restaurant_counts.dill")

In [ ]:

Copied!





# Export the pipeline to JupyterGIS for collaborative exploration
pipeline.to_jgis(
    filepath="EU_restaurant_counts.JGIS",
    urban_layer_name="European Union Restaurants Analysis",
)
# Export the pipeline to JupyterGIS for collaborative exploration
pipeline.to_jgis(
    filepath="EU_restaurant_counts.JGIS",
    urban_layer_name="European Union Restaurants Analysis",
)

In [ ]: