Explode nested track arrays into per-track rows while also producing a per-album summary that keeps the vendor subtree as a JSON blob.
-- ============================================================================
-- Setup: one directory, two external tables — exploded tracks + album summary
-- ============================================================================
CREATE ZONE IF NOT EXISTS {{zone_name}} TYPE EXTERNAL
COMMENT 'External tables — demo datasets and file-backed data';
CREATE SCHEMA IF NOT EXISTS {{zone_name}}.json_demos
COMMENT 'JSON-backed external tables';
-- Table 1: album_tracks — exploded, one row per track (3,503 rows)
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.json_demos.album_tracks
USING JSON
LOCATION '{{data_path}}'
OPTIONS (
json_flatten_config = '{
"root_path": "$",
"explode_paths": ["$.details"],
"include_paths": [
"$.id", "$.name", "$.sku", "$.status", "$.price", "$.taxable",
"$.vendor.id", "$.vendor.name",
"$.details.track_id", "$.details.name", "$.details.composer",
"$.details.genre_id", "$.details.milliseconds", "$.details.bytes",
"$.details.unit_price"
],
"column_mappings": {
"$.id": "id",
"$.name": "name",
"$.vendor.id": "vendor_id",
"$.vendor.name": "vendor_name",
"$.details.track_id": "details_track_id",
"$.details.name": "details_name",
"$.details.genre_id": "details_genre_id",
"$.details.milliseconds": "details_milliseconds",
"$.details.bytes": "details_bytes",
"$.details.unit_price": "details_unit_price"
},
"max_depth": 10,
"separator": "_",
"infer_types": true
}',
file_metadata = '{"columns":["df_file_name","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.json_demos.album_tracks TO USER {{current_user}};
-- Table 2: album_summary — one row per album (347 rows), vendor as JSON blob
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.json_demos.album_summary
USING JSON
LOCATION '{{data_path}}'
OPTIONS (
json_flatten_config = '{
"root_path": "$",
"include_paths": [
"$.id", "$.name", "$.sku", "$.status", "$.price",
"$.taxable", "$.requires_shipping",
"$.vendor", "$.details"
],
"json_paths": ["$.vendor"],
"column_mappings": {
"$.id": "id",
"$.name": "name"
},
"default_array_handling": "count",
"max_depth": 10,
"separator": "_",
"infer_types": true
}',
file_metadata = '{"columns":["df_file_name","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.json_demos.album_summary TO USER {{current_user}};
-- ============================================================================
-- Queries
-- ============================================================================
-- Total exploded track rows
ASSERT ROW_COUNT = 3503
SELECT * FROM {{zone_name}}.json_demos.album_tracks;
-- Total album rows
ASSERT ROW_COUNT = 347
SELECT * FROM {{zone_name}}.json_demos.album_summary;
-- Nested vendor flattening
ASSERT ROW_COUNT = 10
ASSERT VALUE vendor_name = 'AC/DC' WHERE vendor_id = 1
SELECT DISTINCT vendor_id, vendor_name
FROM {{zone_name}}.json_demos.album_tracks
ORDER BY vendor_name
LIMIT 10;
-- json_paths: vendor kept as JSON string in album_summary
ASSERT ROW_COUNT = 1
SELECT name, vendor
FROM {{zone_name}}.json_demos.album_summary
WHERE id = 1;
-- Multi-file attribution via df_file_name
ASSERT ROW_COUNT = 3
ASSERT VALUE track_count = 370 WHERE df_file_name LIKE '%catalog_jazz%'
ASSERT VALUE track_count = 1037 WHERE df_file_name LIKE '%catalog_pop%'
ASSERT VALUE track_count = 2096 WHERE df_file_name LIKE '%catalog_rock%'
SELECT df_file_name, COUNT(*) AS track_count
FROM {{zone_name}}.json_demos.album_tracks
GROUP BY df_file_name
ORDER BY df_file_name;
-- Revenue by album (exercises unit_price inference)
ASSERT ROW_COUNT = 10
ASSERT VALUE total_revenue = 56.43 WHERE vendor_name = 'Lenny Kravitz'
SELECT name, vendor_name,
COUNT(*) AS tracks,
ROUND(SUM(CAST(details_unit_price AS DOUBLE)), 2) AS total_revenue
FROM {{zone_name}}.json_demos.album_tracks
GROUP BY name, vendor_name
ORDER BY total_revenue DESC
LIMIT 10;
-- ============================================================================
-- Cleanup
-- ============================================================================
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.json_demos.album_tracks WITH FILES;
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.json_demos.album_summary WITH FILES;
DROP SCHEMA IF EXISTS {{zone_name}}.json_demos;
DROP ZONE IF EXISTS {{zone_name}};
## When to Use Use this demo when your JSON payload has a repeating inner array (tracks in an album, line items in an order, events in a session) and you need BOTH an exploded grain (one row per child) and a rolled-up summary grain (one row per parent, with the child array counted or preserved). It shows the canonical "dual-view" pattern: two external tables over the SAME source files, each with its own `json_flatten_config`. ## What You Will Learn 1. How `explode_paths: ["$.details"]` fans out an array into one row per element, duplicating the parent columns. 2. How nested-object flattening lets `$.vendor.id` / `$.vendor.name` land as sibling columns without an explicit JOIN. 3. How `column_mappings` gives deep paths readable names (`$.details.track_id` → `details_track_id`). 4. How `json_paths: ["$.vendor"]` keeps a subtree as a raw JSON string column for downstream API use. 5. How `default_array_handling: "count"` replaces the `details` array with an integer track count in the summary table (vs. `to_json` which would keep it as a string). 6. How the same directory feeds two external tables with different flatten configs — no data duplication on disk. 7. How multi-file reading uses `df_file_name` from `file_metadata` to attribute rows to `catalog_rock.json`, `catalog_jazz.json`, or `catalog_pop.json`. 8. How to verify totals with `ASSERT ROW_COUNT` and per-file counts with `ASSERT VALUE ... WHERE df_file_name LIKE ...`. ## Prerequisites A DeltaForge catalog with permission to create a zone and schema, and the three catalog files (`catalog_rock.json`, `catalog_jazz.json`, `catalog_pop.json`) totalling 347 albums / 3,503 tracks in the `{{data_path}}` directory. Familiarity with the customers-basics demo is recommended but not required.