Capture complex nested subtrees as serialized JSON string columns with json_paths, and contrast against full flattening.
-- ============================================================================
-- Setup: two external tables over the same 5 property listings
-- ============================================================================
CREATE ZONE IF NOT EXISTS {{zone_name}} TYPE EXTERNAL
COMMENT 'External tables — demo datasets and file-backed data';
CREATE SCHEMA IF NOT EXISTS {{zone_name}}.json_demos
COMMENT 'JSON-backed external tables';
-- Captured: location and pricing kept as JSON strings
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.json_demos.listings_captured
USING JSON
LOCATION '{{data_path}}'
OPTIONS (
json_flatten_config = '{
"root_path": "$",
"include_paths": [
"$.id", "$.title", "$.type",
"$.bedrooms", "$.bathrooms", "$.sqft",
"$.year_built", "$.status",
"$.location", "$.pricing", "$.tags"
],
"json_paths": ["$.location", "$.pricing"],
"column_mappings": {
"$.id": "id",
"$.type": "type",
"$.year_built": "year_built",
"$.location": "location",
"$.pricing": "pricing"
},
"max_depth": 10,
"separator": "_",
"default_array_handling": "to_json",
"infer_types": true
}',
file_metadata = '{"columns":["df_file_name","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.json_demos.listings_captured TO USER {{current_user}};
-- Flattened: same subtrees expanded into individual columns
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.json_demos.listings_flattened
USING JSON
LOCATION '{{data_path}}'
OPTIONS (
json_flatten_config = '{
"root_path": "$",
"include_paths": [
"$.id", "$.title", "$.type",
"$.bedrooms", "$.bathrooms", "$.sqft",
"$.year_built", "$.status",
"$.location.address.street", "$.location.address.unit",
"$.location.address.city", "$.location.address.state",
"$.location.address.zip",
"$.location.geo.lat", "$.location.geo.lng",
"$.location.neighborhood", "$.location.walk_score",
"$.pricing.list_price", "$.pricing.price_per_sqft",
"$.pricing.hoa_monthly", "$.pricing.tax_annual",
"$.pricing.mortgage_estimate.monthly_payment",
"$.tags"
],
"column_mappings": {
"$.id": "id",
"$.type": "type",
"$.location.address.street": "location_address_street",
"$.location.address.city": "location_address_city",
"$.location.address.state": "location_address_state",
"$.location.address.zip": "location_address_zip",
"$.location.geo.lat": "location_geo_lat",
"$.location.geo.lng": "location_geo_lng",
"$.location.neighborhood": "location_neighborhood",
"$.location.walk_score": "location_walk_score",
"$.pricing.list_price": "pricing_list_price",
"$.pricing.price_per_sqft": "pricing_price_per_sqft",
"$.pricing.hoa_monthly": "pricing_hoa_monthly",
"$.pricing.tax_annual": "pricing_tax_annual",
"$.pricing.mortgage_estimate.monthly_payment": "pricing_mortgage_estimate_monthly_payment"
},
"max_depth": 10,
"separator": "_",
"default_array_handling": "to_json",
"infer_types": true
}',
file_metadata = '{"columns":["df_file_name","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.json_demos.listings_flattened TO USER {{current_user}};
-- ============================================================================
-- Queries: captured vs flattened
-- ============================================================================
-- Both tables see the same 5 listings
ASSERT ROW_COUNT = 5
SELECT * FROM {{zone_name}}.json_demos.listings_captured;
ASSERT ROW_COUNT = 5
SELECT * FROM {{zone_name}}.json_demos.listings_flattened;
-- Every captured row is a JSON object (starts with '{')
ASSERT VALUE json_location_count = 5
SELECT COUNT(*) FILTER (WHERE location LIKE '{%') AS json_location_count
FROM {{zone_name}}.json_demos.listings_captured;
ASSERT VALUE json_pricing_count = 5
SELECT COUNT(*) FILTER (WHERE pricing LIKE '{%') AS json_pricing_count
FROM {{zone_name}}.json_demos.listings_captured;
-- Flattened table lets you filter on nested fields directly
ASSERT ROW_COUNT = 5
ASSERT VALUE location_address_city = 'San Francisco' WHERE id = 'LST-001'
ASSERT VALUE location_neighborhood = 'Hawthorne' WHERE id = 'LST-002'
ASSERT VALUE pricing_list_price = 4750000 WHERE id = 'LST-005'
SELECT id, title, type, bedrooms,
location_address_city, location_address_state, location_neighborhood,
pricing_list_price, pricing_tax_annual,
pricing_mortgage_estimate_monthly_payment
FROM {{zone_name}}.json_demos.listings_flattened
ORDER BY id;
-- Side-by-side: JOIN captured and flattened on id
ASSERT ROW_COUNT = 1
ASSERT VALUE location_address_city = 'Boston'
ASSERT VALUE pricing_mortgage_estimate_monthly_payment = 27730
SELECT c.id, c.title, c.location,
f.location_address_city, f.location_address_state,
f.pricing_list_price, f.pricing_mortgage_estimate_monthly_payment
FROM {{zone_name}}.json_demos.listings_captured c
JOIN {{zone_name}}.json_demos.listings_flattened f ON c.id = f.id
WHERE c.id = 'LST-005';
-- ============================================================================
-- Cleanup
-- ============================================================================
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.json_demos.listings_captured WITH FILES;
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.json_demos.listings_flattened WITH FILES;
DROP SCHEMA IF EXISTS {{zone_name}}.json_demos;
DROP ZONE IF EXISTS {{zone_name}};
## When to Use Use this demo when a JSON document has subtrees you want to preserve verbatim — typically for downstream systems that re-serialize the JSON (map renderers, mortgage calculators, API responses) — but you still want the top-level scalar fields as proper columns. `json_paths` tells DeltaForge "stop flattening here, hand me the whole subtree as a JSON string." The demo builds two tables from the same 5 property listings: `listings_captured` (location + pricing kept as JSON blobs) and `listings_flattened` (same subtrees expanded into individual columns) so you can compare the two shapes side by side. ## What You Will Learn 1. How `json_paths: ["$.location", "$.pricing"]` turns two subtrees into two VARCHAR columns containing raw JSON. 2. How `json_paths` must be combined with `include_paths` — the path must appear in BOTH for the column to materialize. 3. How captured subtrees coexist with normally flattened top-level fields (`id`, `title`, `bedrooms`). 4. How `column_mappings` renames both flattened fields AND captured JSON columns for readability. 5. How the same source files produce a fully flattened shape (`location_address_city`, `pricing_mortgage_estimate_monthly_payment`) when `json_paths` is omitted. 6. How to JOIN the captured and flattened tables on a shared key to demonstrate equivalence. 7. How `LIKE '{%'` and `LIKE '%list_price%'` are pragmatic integrity checks on captured JSON. 8. When to prefer captured JSON (API pass-through, schema churn, unknown consumers) over full flattening (analytics, BI, direct SQL predicates). ## Prerequisites A DeltaForge catalog with create-zone / create-schema permission and the two listing files (`01_listings_residential.json`, `02_listings_commercial.json`, 5 listings total) in the `{{data_path}}` directory. Reviewing the music-catalog demo first gives useful context for `json_paths` vs. `explode_paths`.