Read multi-carrier proto3 ShippingManifest binaries to exercise bool/int64/float scalars, 3-level nesting (Shipment -> Package -> Dimensions), two enums, and two parallel repeated fields (packages + tracking).
CREATE ZONE IF NOT EXISTS external TYPE EXTERNAL;
CREATE SCHEMA IF NOT EXISTS external.protobuf_freight;
-- Table 1: shipments (12 rows) - packages and tracking joined as CSV
CREATE EXTERNAL TABLE IF NOT EXISTS external.protobuf_freight.shipments
USING PROTOBUF
LOCATION '/data/protobuf/freight'
OPTIONS (
schema_path = '/data/protobuf/freight/schema/freight.proto',
message_name = 'freight.ShippingManifest',
proto_flatten_config = '{
"row_path": "shipments",
"include_paths": [
"shipments.shipment_id", "shipments.origin", "shipments.destination",
"shipments.status", "shipments.is_express", "shipments.is_insured",
"shipments.total_cost_cents",
"shipments.packages.package_id", "shipments.packages.description",
"shipments.tracking.location", "shipments.created_at"
],
"default_repeat_handling": "join_comma",
"decode_enums": true,
"timestamp_format": "iso8601",
"max_depth": 10
}',
file_metadata = '{"columns":["df_file_name","df_file_modified","df_dataset","df_row_number"]}'
);
-- Table 2: shipment_packages (24 rows) - explode packages, pull 3-level Dimensions
CREATE EXTERNAL TABLE IF NOT EXISTS external.protobuf_freight.shipment_packages
USING PROTOBUF
LOCATION '/data/protobuf/freight'
OPTIONS (
schema_path = '/data/protobuf/freight/schema/freight.proto',
message_name = 'freight.ShippingManifest',
proto_flatten_config = '{
"row_path": "shipments",
"explode_paths": ["shipments.packages"],
"include_paths": [
"shipments.shipment_id", "shipments.status", "shipments.is_express",
"shipments.packages.package_id", "shipments.packages.description",
"shipments.packages.weight_kg",
"shipments.packages.dimensions.length_cm",
"shipments.packages.dimensions.width_cm",
"shipments.packages.dimensions.height_cm",
"shipments.packages.package_class",
"shipments.packages.requires_signature",
"shipments.packages.declared_value_cents"
],
"decode_enums": true,
"timestamp_format": "iso8601",
"max_depth": 10
}'
);
-- Table 3: shipment_tracking (39 rows) - explode the second repeated field
CREATE EXTERNAL TABLE IF NOT EXISTS external.protobuf_freight.shipment_tracking
USING PROTOBUF
LOCATION '/data/protobuf/freight'
OPTIONS (
schema_path = '/data/protobuf/freight/schema/freight.proto',
message_name = 'freight.ShippingManifest',
proto_flatten_config = '{
"row_path": "shipments",
"explode_paths": ["shipments.tracking"],
"include_paths": [
"shipments.shipment_id", "shipments.origin", "shipments.destination",
"shipments.status",
"shipments.tracking.event_time",
"shipments.tracking.location",
"shipments.tracking.description"
],
"decode_enums": true,
"timestamp_format": "iso8601"
}'
);
-- Verify
ASSERT ROW_COUNT = 12 SELECT * FROM external.protobuf_freight.shipments;
ASSERT ROW_COUNT = 24 SELECT * FROM external.protobuf_freight.shipment_packages;
ASSERT ROW_COUNT = 39 SELECT * FROM external.protobuf_freight.shipment_tracking;
ASSERT VALUE total_value = 2150000
SELECT SUM(declared_value_cents) AS total_value
FROM external.protobuf_freight.shipment_packages
WHERE declared_value_cents > 100000;
-- Cleanup
DROP EXTERNAL TABLE IF EXISTS external.protobuf_freight.shipment_tracking WITH FILES;
DROP EXTERNAL TABLE IF EXISTS external.protobuf_freight.shipment_packages WITH FILES;
DROP EXTERNAL TABLE IF EXISTS external.protobuf_freight.shipments WITH FILES;
DROP SCHEMA IF EXISTS external.protobuf_freight;
## When to Use Use this demo when your protobuf payloads have deep nesting and mixed scalar types: monetary `int64` in minor units, `float` measurements, multiple `bool` flags at different levels, more than one enum, and more than one repeated field per parent. The freight/logistics scenario mirrors real-world proto messages emitted by shipping APIs and EDI-to-proto gateways. ## What You Will Learn 1. Flatten a 3-level nested message tree (`Shipment -> Package -> Dimensions`) into scalar columns using dotted `include_paths`. 2. Run the same underlying file through three different `CREATE EXTERNAL TABLE` views (shipments / packages / tracking) by varying `explode_paths` - the DeltaForge single-code-path pattern for "one row per X". 3. Decode two independent enums in the same schema (`ShipmentStatus`, `PackageClass`) with `decode_enums = true`. 4. Aggregate `int64` money-in-cents (`total_cost_cents`, `declared_value_cents`) and `float` dimensions without losing precision. 5. Filter on `bool` fields at different nesting depths (`shipments.is_express`, `shipments.packages.requires_signature`). 6. Explode two sibling repeated fields independently (`packages` versus `tracking`) from the same parent. 7. Verify multi-file provenance using `df_file_name` against three carrier files. ## Prerequisites - `data/carrier_alpha.pb`, `carrier_beta.pb`, `carrier_gamma.pb` and the `schema/freight.proto` + `schema/timestamp.proto` files on a reachable path. - A zone of type `EXTERNAL` (defaults to `external`). - Understanding of `ASSERT ROW_COUNT` / `ASSERT VALUE` and how `explode_paths` affects cardinality.