Protobuf Freight Shipping - Multi-Carrier Manifest

Read multi-carrier proto3 ShippingManifest binaries to exercise bool/int64/float scalars, 3-level nesting (Shipment -> Package -> Dimensions), two enums, and two parallel repeated fields (packages + tracking).

Category: protobuf

Syntax

CREATE ZONE IF NOT EXISTS external TYPE EXTERNAL;
CREATE SCHEMA IF NOT EXISTS external.protobuf_freight;

-- Table 1: shipments (12 rows) - packages and tracking joined as CSV
CREATE EXTERNAL TABLE IF NOT EXISTS external.protobuf_freight.shipments
USING PROTOBUF
LOCATION '/data/protobuf/freight'
OPTIONS (
    schema_path = '/data/protobuf/freight/schema/freight.proto',
    message_name = 'freight.ShippingManifest',
    proto_flatten_config = '{
        "row_path": "shipments",
        "include_paths": [
            "shipments.shipment_id", "shipments.origin", "shipments.destination",
            "shipments.status", "shipments.is_express", "shipments.is_insured",
            "shipments.total_cost_cents",
            "shipments.packages.package_id", "shipments.packages.description",
            "shipments.tracking.location", "shipments.created_at"
        ],
        "default_repeat_handling": "join_comma",
        "decode_enums": true,
        "timestamp_format": "iso8601",
        "max_depth": 10
    }',
    file_metadata = '{"columns":["df_file_name","df_file_modified","df_dataset","df_row_number"]}'
);

-- Table 2: shipment_packages (24 rows) - explode packages, pull 3-level Dimensions
CREATE EXTERNAL TABLE IF NOT EXISTS external.protobuf_freight.shipment_packages
USING PROTOBUF
LOCATION '/data/protobuf/freight'
OPTIONS (
    schema_path = '/data/protobuf/freight/schema/freight.proto',
    message_name = 'freight.ShippingManifest',
    proto_flatten_config = '{
        "row_path": "shipments",
        "explode_paths": ["shipments.packages"],
        "include_paths": [
            "shipments.shipment_id", "shipments.status", "shipments.is_express",
            "shipments.packages.package_id", "shipments.packages.description",
            "shipments.packages.weight_kg",
            "shipments.packages.dimensions.length_cm",
            "shipments.packages.dimensions.width_cm",
            "shipments.packages.dimensions.height_cm",
            "shipments.packages.package_class",
            "shipments.packages.requires_signature",
            "shipments.packages.declared_value_cents"
        ],
        "decode_enums": true,
        "timestamp_format": "iso8601",
        "max_depth": 10
    }'
);

-- Table 3: shipment_tracking (39 rows) - explode the second repeated field
CREATE EXTERNAL TABLE IF NOT EXISTS external.protobuf_freight.shipment_tracking
USING PROTOBUF
LOCATION '/data/protobuf/freight'
OPTIONS (
    schema_path = '/data/protobuf/freight/schema/freight.proto',
    message_name = 'freight.ShippingManifest',
    proto_flatten_config = '{
        "row_path": "shipments",
        "explode_paths": ["shipments.tracking"],
        "include_paths": [
            "shipments.shipment_id", "shipments.origin", "shipments.destination",
            "shipments.status",
            "shipments.tracking.event_time",
            "shipments.tracking.location",
            "shipments.tracking.description"
        ],
        "decode_enums": true,
        "timestamp_format": "iso8601"
    }'
);

-- Verify
ASSERT ROW_COUNT = 12 SELECT * FROM external.protobuf_freight.shipments;
ASSERT ROW_COUNT = 24 SELECT * FROM external.protobuf_freight.shipment_packages;
ASSERT ROW_COUNT = 39 SELECT * FROM external.protobuf_freight.shipment_tracking;
ASSERT VALUE total_value = 2150000
SELECT SUM(declared_value_cents) AS total_value
FROM external.protobuf_freight.shipment_packages
WHERE declared_value_cents > 100000;

-- Cleanup
DROP EXTERNAL TABLE IF EXISTS external.protobuf_freight.shipment_tracking WITH FILES;
DROP EXTERNAL TABLE IF EXISTS external.protobuf_freight.shipment_packages WITH FILES;
DROP EXTERNAL TABLE IF EXISTS external.protobuf_freight.shipments WITH FILES;
DROP SCHEMA IF EXISTS external.protobuf_freight;

Description

## When to Use Use this demo when your protobuf payloads have deep nesting and mixed scalar types: monetary `int64` in minor units, `float` measurements, multiple `bool` flags at different levels, more than one enum, and more than one repeated field per parent. The freight/logistics scenario mirrors real-world proto messages emitted by shipping APIs and EDI-to-proto gateways. ## What You Will Learn 1. Flatten a 3-level nested message tree (`Shipment -> Package -> Dimensions`) into scalar columns using dotted `include_paths`. 2. Run the same underlying file through three different `CREATE EXTERNAL TABLE` views (shipments / packages / tracking) by varying `explode_paths` - the DeltaForge single-code-path pattern for "one row per X". 3. Decode two independent enums in the same schema (`ShipmentStatus`, `PackageClass`) with `decode_enums = true`. 4. Aggregate `int64` money-in-cents (`total_cost_cents`, `declared_value_cents`) and `float` dimensions without losing precision. 5. Filter on `bool` fields at different nesting depths (`shipments.is_express`, `shipments.packages.requires_signature`). 6. Explode two sibling repeated fields independently (`packages` versus `tracking`) from the same parent. 7. Verify multi-file provenance using `df_file_name` against three carrier files. ## Prerequisites - `data/carrier_alpha.pb`, `carrier_beta.pb`, `carrier_gamma.pb` and the `schema/freight.proto` + `schema/timestamp.proto` files on a reachable path. - A zone of type `EXTERNAL` (defaults to `external`). - Understanding of `ASSERT ROW_COUNT` / `ASSERT VALUE` and how `explode_paths` affects cardinality.

Pitfalls

See Also

Open in interactive docs →   DeltaForge home →