Iceberg V2: Fleet Telemetry Analytics

Read a native Iceberg v2 table (450 fleet GPS pings) and validate enhanced per-column manifest stats with safety, fuel, and speeding analytics, no conversion to Delta.

Category: iceberg

Syntax

-- ============================================================================
-- SETUP
-- ============================================================================
-- Registers an external table backed by a native Apache Iceberg v2 table.
-- DeltaForge reads the Iceberg metadata chain directly:
-- metadata.json -> manifest list -> manifests -> Parquet data files.
-- V2 adds enhanced per-column statistics for more effective predicate pushdown.

-- STEP 1: Zone & Schema
CREATE ZONE IF NOT EXISTS {{zone_name}} TYPE EXTERNAL
    COMMENT 'External tables - demo datasets and file-backed data';

CREATE SCHEMA IF NOT EXISTS {{zone_name}}.iceberg_demos
    COMMENT 'Apache Iceberg native table demos';

-- STEP 2: Register the Iceberg v2 table.
-- LOCATION is the table root. DeltaForge parses metadata.json to discover
-- schema and data files. format-version = 2.
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.iceberg_demos.fleet_telemetry
USING ICEBERG
LOCATION '{{data_path}}/fleet_telemetry';

GRANT ADMIN ON TABLE {{zone_name}}.iceberg_demos.fleet_telemetry TO USER {{current_user}};


-- ============================================================================
-- QUERIES
-- ============================================================================

-- Query 1: Baseline row count - verifies the v2 manifest chain resolved.
ASSERT ROW_COUNT = 450
SELECT * FROM {{zone_name}}.iceberg_demos.fleet_telemetry;

-- Query 2: Full schema projection - all 13 columns including boolean.
ASSERT ROW_COUNT = 450
ASSERT VALUE fleet = 'West-Coast'   WHERE vehicle_id = 'VH-0001'
ASSERT VALUE vehicle_type = 'Delivery-Van' WHERE vehicle_id = 'VH-0001'
ASSERT VALUE driver_id = 'DRV-141' WHERE vehicle_id = 'VH-0001'
SELECT
    vehicle_id, fleet, vehicle_type, driver_id,
    latitude, longitude, speed_mph, fuel_level_pct,
    engine_temp_f, odometer_miles, idle_minutes,
    harsh_braking, route_id
FROM {{zone_name}}.iceberg_demos.fleet_telemetry
ORDER BY vehicle_id;

-- Query 3: Per-fleet row counts (3 regional fleets).
ASSERT ROW_COUNT = 3
ASSERT VALUE ping_count = 150 WHERE fleet = 'East-Coast'
ASSERT VALUE ping_count = 150 WHERE fleet = 'Midwest'
ASSERT VALUE ping_count = 150 WHERE fleet = 'West-Coast'
SELECT fleet, COUNT(*) AS ping_count
FROM {{zone_name}}.iceberg_demos.fleet_telemetry
GROUP BY fleet
ORDER BY fleet;

-- Query 4: Vehicle type distribution.
ASSERT ROW_COUNT = 3
ASSERT VALUE vehicle_count = 132 WHERE vehicle_type = 'Box-Truck'
ASSERT VALUE vehicle_count = 156 WHERE vehicle_type = 'Delivery-Van'
ASSERT VALUE vehicle_count = 162 WHERE vehicle_type = 'Semi-Truck'
SELECT vehicle_type, COUNT(*) AS vehicle_count
FROM {{zone_name}}.iceberg_demos.fleet_telemetry
GROUP BY vehicle_type
ORDER BY vehicle_type;

-- Query 5: Average speed by fleet (float aggregation).
ASSERT ROW_COUNT = 3
ASSERT VALUE avg_speed = 37.27 WHERE fleet = 'East-Coast'
ASSERT VALUE avg_speed = 37.89 WHERE fleet = 'Midwest'
ASSERT VALUE avg_speed = 37.13 WHERE fleet = 'West-Coast'
SELECT fleet, ROUND(AVG(speed_mph), 2) AS avg_speed
FROM {{zone_name}}.iceberg_demos.fleet_telemetry
GROUP BY fleet
ORDER BY fleet;

-- Query 6: Total idle minutes by fleet (fuel-waste proxy).
ASSERT ROW_COUNT = 3
ASSERT VALUE total_idle = 3355 WHERE fleet = 'East-Coast'
ASSERT VALUE total_idle = 3439 WHERE fleet = 'Midwest'
ASSERT VALUE total_idle = 3751 WHERE fleet = 'West-Coast'
SELECT fleet, SUM(idle_minutes) AS total_idle
FROM {{zone_name}}.iceberg_demos.fleet_telemetry
GROUP BY fleet
ORDER BY fleet;

-- Query 7: Harsh braking events per fleet (boolean aggregation).
ASSERT ROW_COUNT = 3
ASSERT VALUE harsh_events = 27 WHERE fleet = 'East-Coast'
ASSERT VALUE harsh_events = 20 WHERE fleet = 'Midwest'
ASSERT VALUE harsh_events = 31 WHERE fleet = 'West-Coast'
SELECT fleet, SUM(CASE WHEN harsh_braking THEN 1 ELSE 0 END) AS harsh_events
FROM {{zone_name}}.iceberg_demos.fleet_telemetry
GROUP BY fleet
ORDER BY fleet;

-- Query 8: Speeding vehicles (> 65 mph).
ASSERT ROW_COUNT = 53
SELECT vehicle_id, fleet, vehicle_type, speed_mph, route_id
FROM {{zone_name}}.iceberg_demos.fleet_telemetry
WHERE speed_mph > 65
ORDER BY speed_mph DESC;

-- Query 9: Low fuel alerts (< 20%).
ASSERT ROW_COUNT = 74
SELECT vehicle_id, fleet, fuel_level_pct, route_id
FROM {{zone_name}}.iceberg_demos.fleet_telemetry
WHERE fuel_level_pct < 20
ORDER BY fuel_level_pct ASC;

-- Query 10: High engine temperature (> 220 F).
ASSERT ROW_COUNT = 157
SELECT vehicle_id, fleet, vehicle_type, engine_temp_f, speed_mph
FROM {{zone_name}}.iceberg_demos.fleet_telemetry
WHERE engine_temp_f > 220
ORDER BY engine_temp_f DESC;

-- Query 11: Distinct counts (COUNT DISTINCT over 3 columns).
ASSERT ROW_COUNT = 1
ASSERT VALUE distinct_vehicles = 450
ASSERT VALUE distinct_drivers = 98
ASSERT VALUE distinct_routes = 15
SELECT
    COUNT(DISTINCT vehicle_id) AS distinct_vehicles,
    COUNT(DISTINCT driver_id)  AS distinct_drivers,
    COUNT(DISTINCT route_id)   AS distinct_routes
FROM {{zone_name}}.iceberg_demos.fleet_telemetry;

-- VERIFY: cross-cutting sanity check.
ASSERT ROW_COUNT = 1
ASSERT VALUE total_rows = 450
ASSERT VALUE fleet_count = 3
ASSERT VALUE total_harsh_braking = 78
ASSERT VALUE speeding_count = 53
ASSERT VALUE low_fuel_count = 74
ASSERT VALUE distinct_vehicles = 450
ASSERT VALUE distinct_routes = 15
SELECT
    COUNT(*) AS total_rows,
    COUNT(DISTINCT fleet) AS fleet_count,
    SUM(CASE WHEN harsh_braking THEN 1 ELSE 0 END) AS total_harsh_braking,
    SUM(CASE WHEN speed_mph > 65 THEN 1 ELSE 0 END) AS speeding_count,
    SUM(CASE WHEN fuel_level_pct < 20 THEN 1 ELSE 0 END) AS low_fuel_count,
    COUNT(DISTINCT vehicle_id) AS distinct_vehicles,
    COUNT(DISTINCT route_id) AS distinct_routes
FROM {{zone_name}}.iceberg_demos.fleet_telemetry;


-- ============================================================================
-- CLEANUP
-- ============================================================================

DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.iceberg_demos.fleet_telemetry WITH FILES;
DROP SCHEMA IF EXISTS {{zone_name}}.iceberg_demos;
DROP ZONE IF EXISTS {{zone_name}};

Description

## When to Use Pick this demo when you need to exercise DeltaForge's **Iceberg format-version 2** reader, the format that most Spark/Trino lakehouses produce today. V2 keeps the v1 metadata chain but strengthens it with richer per-column statistics (value counts, null counts, lower/upper bounds) embedded in manifest entries. That lets query planners skip files on predicate pushdown without opening the Parquet footer. This demo is a fleet telemetry workload: safety events, fuel alerts, and speeding, queries that would benefit most from file-level skipping. ## What You Will Learn 1. How `CREATE EXTERNAL TABLE ... USING ICEBERG` auto-registers a v2 table from just a `LOCATION`. 2. How Iceberg v2 manifests carry per-column min/max/null counts and how DeltaForge uses them during planning. 3. Reading booleans out of Iceberg correctly (`harsh_braking`) including `CASE WHEN col THEN 1 ELSE 0 END` aggregation. 4. Mixed-type aggregations: `AVG(speed_mph)`, `SUM(idle_minutes)`, `COUNT(DISTINCT route_id)`. 5. Predicate pushdown on numeric columns (speeding, low fuel, high engine temp). 6. ASSERT-based regression pinning across 11 analytic queries plus a cross-cutting VERIFY. 7. Why v2 is the practical target version for interop, v1 is legacy, v3 is newer than most engines read. ## Prerequisites Provide a pre-materialized Iceberg v2 table at `{{data_path}}/fleet_telemetry` with the standard `metadata/` and `data/` subdirectories. The dataset is 450 rows, 13 columns (`vehicle_id`, `fleet`, `vehicle_type`, `driver_id`, `latitude`, `longitude`, `speed_mph`, `fuel_level_pct`, `engine_temp_f`, `odometer_miles`, `idle_minutes`, `harsh_braking` boolean, `route_id`) split across three regional fleets. No Delta conversion, no write path, no external catalog service required.

Pitfalls

See Also

Open in interactive docs →   DeltaForge home →