Ride-share geofencing across 12 pricing zones and 1,000,000 driver GPS positions in 8 world cities, converts O(n × m) polygon intersection into O(1) integer-equality joins via H3 cell matching.
-- ============================================================================
-- SETUP, zone, schema, 2 Delta tables + 2 views
-- ============================================================================
CREATE ZONE IF NOT EXISTS {{zone_name}} TYPE EXTERNAL
COMMENT 'External and Delta tables, demo datasets';
CREATE SCHEMA IF NOT EXISTS {{zone_name}}.spatial_demos
COMMENT 'H3 spatial indexing and geographic analysis tables';
CREATE DELTA TABLE IF NOT EXISTS {{zone_name}}.spatial_demos.zones (
zone_id INT, zone_name VARCHAR, zone_type VARCHAR,
city VARCHAR, country VARCHAR, polygon_wkt VARCHAR, surcharge_pct DOUBLE
) LOCATION '{{data_path}}/pip_zones';
CREATE DELTA TABLE IF NOT EXISTS {{zone_name}}.spatial_demos.driver_positions (
id BIGINT, lat DOUBLE, lng DOUBLE, driver_id VARCHAR, city VARCHAR
) LOCATION '{{data_path}}/pip_driver_positions';
-- 1,000,000 drivers across 8 city clusters + global scatter
INSERT INTO {{zone_name}}.spatial_demos.driver_positions
SELECT id,
37.58 + (0.24 * ((CAST(id AS DOUBLE) * 0.618033988749895) % 1.0)) AS lat,
-122.52 + (0.18 * ((CAST(id AS DOUBLE) * 0.381966011250105) % 1.0)) AS lng,
'driver_sf_' || (id % 200) AS driver_id,
'San Francisco' AS city
FROM generate_series(1, 150000) AS t(id);
-- (repeat for NYC, Paris, London, Tokyo at 150K each; Sydney, LA at 100K; global 50K)
CREATE OR REPLACE VIEW {{zone_name}}.spatial_demos.driver_cells AS
SELECT id, lat, lng, driver_id, city,
h3_latlng_to_cell(lat, lng, 9) AS h3_cell
FROM {{zone_name}}.spatial_demos.driver_positions;
CREATE OR REPLACE VIEW {{zone_name}}.spatial_demos.zone_cells AS
SELECT zone_id, zone_name, zone_type, city, country, surcharge_pct,
UNNEST(h3_polyfill(polygon_wkt, 9)) AS h3_cell
FROM {{zone_name}}.spatial_demos.zones;
-- ============================================================================
-- QUERIES, progressive (H3 intro → 1M join → dashboard analytics)
-- ============================================================================
-- What is an H3 cell?
SELECT h3_latlng_to_cell(37.6213, -122.3790, 9) AS sfo_cell_id,
h3_cell_to_string(h3_latlng_to_cell(37.6213, -122.3790, 9)) AS sfo_hex_string;
-- Cell size at resolution 9
SELECT 9 AS resolution,
ROUND(h3_cell_area (h3_latlng_to_cell(37.6213, -122.3790, 9)), 0) AS area_m2,
ROUND(h3_edge_length(h3_latlng_to_cell(37.6213, -122.3790, 9)), 0) AS edge_length_m;
-- Polyfill cell count per zone
SELECT z.zone_name, z.zone_type, COUNT(*) AS h3_cells
FROM {{zone_name}}.spatial_demos.zone_cells z
GROUP BY z.zone_name, z.zone_type
ORDER BY h3_cells DESC;
-- The million-row spatial join
SELECT z.zone_name, z.zone_type, z.city, z.surcharge_pct,
COUNT(*) AS drivers_in_zone
FROM {{zone_name}}.spatial_demos.driver_cells d
INNER JOIN {{zone_name}}.spatial_demos.zone_cells z ON d.h3_cell = z.h3_cell
GROUP BY z.zone_name, z.zone_type, z.city, z.surcharge_pct
ORDER BY drivers_in_zone DESC;
-- Drivers outside every zone (NOT EXISTS)
SELECT COUNT(*) AS drivers_outside_zones
FROM {{zone_name}}.spatial_demos.driver_cells d
WHERE NOT EXISTS (SELECT 1 FROM {{zone_name}}.spatial_demos.zone_cells z
WHERE d.h3_cell = z.h3_cell);
-- Single-point lookup, 'is this coord inside a pricing zone?'
SELECT 'SFO Airport' AS test_point, z.zone_name AS matched_zone, z.surcharge_pct
FROM {{zone_name}}.spatial_demos.zone_cells z
WHERE z.h3_cell = h3_latlng_to_cell(37.6213, -122.3790, 9)
UNION ALL
SELECT 'Times Square', z.zone_name, z.surcharge_pct
FROM {{zone_name}}.spatial_demos.zone_cells z
WHERE z.h3_cell = h3_latlng_to_cell(40.758, -73.9855, 9);
-- Airport vs downtown density (drivers per H3 cell)
SELECT z.zone_type,
COUNT(DISTINCT z.zone_name) AS num_zones,
SUM(zc.cell_count) AS total_h3_cells,
SUM(driver_count) AS total_drivers,
ROUND(SUM(driver_count) * 1.0
/ SUM(zc.cell_count), 1) AS drivers_per_cell
FROM (SELECT zone_name, zone_type, COUNT(*) AS driver_count
FROM {{zone_name}}.spatial_demos.driver_cells d
INNER JOIN {{zone_name}}.spatial_demos.zone_cells z ON d.h3_cell = z.h3_cell
GROUP BY zone_name, zone_type) z
INNER JOIN (SELECT zone_name, COUNT(*) AS cell_count
FROM {{zone_name}}.spatial_demos.zone_cells
GROUP BY zone_name) zc ON z.zone_name = zc.zone_name
GROUP BY z.zone_type
ORDER BY z.zone_type;
-- ============================================================================
-- CLEANUP
-- ============================================================================
DROP VIEW IF EXISTS {{zone_name}}.spatial_demos.zone_cells;
DROP VIEW IF EXISTS {{zone_name}}.spatial_demos.driver_cells;
DROP DELTA TABLE IF EXISTS {{zone_name}}.spatial_demos.driver_positions WITH FILES;
DROP DELTA TABLE IF EXISTS {{zone_name}}.spatial_demos.zones WITH FILES;
DROP SCHEMA IF EXISTS {{zone_name}}.spatial_demos;
DROP ZONE IF EXISTS {{zone_name}};
## When to Use Reach for this demo when you need to assign each of a very large number of points to one of a small number of polygons, ride-share surge pricing, delivery dispatch, IoT fleet geofencing. H3 rewrites point-in-polygon as an integer hash join: every driver position maps to one H3 cell ID (one function call), every zone polygon is pre-expanded to its covering cells via h3_polyfill, and the join becomes `ON driver_cells.h3_cell = zone_cells.h3_cell`, a hash join DataFusion already runs in O(1) per row. ## What You Will Learn 1. How to generate 1,000,000 deterministic GPS pings with `generate_series` and a golden-ratio quasi-random distribution across 8 city bounding boxes + a global scatter cluster 2. How to pre-expand zone polygons into H3 coverage with `h3_polyfill(polygon_wkt, 9)` (~107 cells for an airport zone, ~129 cells for a downtown zone at res 9) 3. How to run the million-row spatial join with a plain INNER JOIN on an integer column, no trigonometry, no ray-casting 4. How to detect 'outside every zone' drivers with NOT EXISTS on the zone_cells view 5. How to aggregate matched drivers per city, zone type, and surcharge tier in a single dashboard query 6. How to catch a systematic data-generation artifact: the golden-ratio pair φ and 1-φ produces lat/lng fractional parts that sum to exactly 1, so any zone requiring mid-range lat AND mid-range lng simultaneously receives zero matches (CDG, Heathrow, Narita, etc. in this demo) ## Prerequisites - DeltaForge GUI running with a workspace open - Write access to an EXTERNAL zone and permission to CREATE SCHEMA and VIEW - Variables: `{{zone_name}}` (defaults to `external`), `{{data_path}}` for two Delta tables (`zones`, `driver_positions`) - ~1 GB free disk for the 1M-row Delta table and the view materialization