Read proto3 AddressBook binaries, flatten nested PhoneNumber messages, decode PhoneType enums, and handle sparse/repeated fields across 3 team files (13 contacts, 22 phones).
-- Zone and schema
CREATE ZONE IF NOT EXISTS external TYPE EXTERNAL;
CREATE SCHEMA IF NOT EXISTS external.protobuf_demos;
-- Flattened contacts (13 rows) - phones joined as CSV
CREATE EXTERNAL TABLE IF NOT EXISTS external.protobuf_demos.contacts
USING PROTOBUF
LOCATION '/data/protobuf/addressbook'
OPTIONS (
proto_schema = '/schema/addressbook.proto',
message_type = 'tutorial.AddressBook',
proto_flatten_config = '{
"row_path": "people",
"include_paths": [
"people.name", "people.id", "people.email",
"people.phones.number", "people.phones.type",
"people.last_updated"
],
"default_repeat_handling": "join_comma",
"column_mappings": {
"people.name": "contact_name",
"people.id": "contact_id",
"people.phones.number": "phone_numbers",
"people.phones.type": "phone_types",
"people.last_updated": "last_updated"
},
"decode_enums": true,
"timestamp_format": "iso8601",
"max_depth": 10
}',
file_metadata = '{"columns":["df_file_name","df_file_modified","df_dataset","df_row_number"]}'
);
-- Exploded phone rows (22 rows) - one row per PhoneNumber
CREATE EXTERNAL TABLE IF NOT EXISTS external.protobuf_demos.contact_phones
USING PROTOBUF
LOCATION '/data/protobuf/addressbook'
OPTIONS (
proto_schema = '/schema/addressbook.proto',
message_type = 'tutorial.AddressBook',
proto_flatten_config = '{
"row_path": "people",
"explode_paths": ["people.phones"],
"include_paths": [
"people.name", "people.id", "people.email",
"people.phones.number", "people.phones.type",
"people.last_updated"
],
"column_mappings": {
"people.phones.number": "phone_number",
"people.phones.type": "phone_type"
},
"decode_enums": true,
"timestamp_format": "iso8601",
"max_depth": 10
}'
);
-- Verify
ASSERT ROW_COUNT = 13 SELECT * FROM external.protobuf_demos.contacts;
ASSERT ROW_COUNT = 22 SELECT * FROM external.protobuf_demos.contact_phones;
ASSERT VALUE count = 11 WHERE phone_type = 'MOBILE'
SELECT phone_type, COUNT(*) AS count
FROM external.protobuf_demos.contact_phones
GROUP BY phone_type;
-- Cleanup
DROP EXTERNAL TABLE IF EXISTS external.protobuf_demos.contacts WITH FILES;
DROP EXTERNAL TABLE IF EXISTS external.protobuf_demos.contact_phones WITH FILES;
## When to Use Use this demo when you need to ingest Protocol Buffers (proto3) binary files containing nested messages, repeated fields, and well-known types. It uses the canonical Google `AddressBook` schema to cover the common proto-to-table patterns you hit with microservice event streams, mobile exports, or gRPC payload archives. ## What You Will Learn 1. Create a `CREATE EXTERNAL TABLE ... USING PROTOBUF` against a `.proto` schema and select a specific `message_type` (`tutorial.AddressBook`). 2. Set `row_path` to pivot a repeated top-level field (`people`) into the row axis. 3. Choose a repeat strategy: `default_repeat_handling = 'join_comma'` for a flat contacts view versus `explode_paths` for one row per `PhoneNumber`. 4. Decode enums to string labels via `decode_enums = true` (PhoneType 0/1/2 becomes MOBILE/HOME/WORK). 5. Convert `google.protobuf.Timestamp` into ISO 8601 strings with `timestamp_format = 'iso8601'`. 6. Rename deep proto paths into flat column names using `column_mappings`. 7. Attach file provenance columns (`df_file_name`, `df_file_modified`, `df_dataset`, `df_row_number`) via the `file_metadata` option. 8. Handle proto3 sparse semantics: an empty `repeated phones` list and a default-valued empty `email` string. ## Prerequisites - The three `.pb` files under `data/` and the `addressbook.proto` schema file. - A writable external zone (defaults to `external`). - Familiarity with `CREATE EXTERNAL TABLE`, `ASSERT`, and schema-qualified table names.