Extract deeply nested (4+ level) JSON fields across 10 heterogeneous country files, with exclude_paths and schema-evolution NULL filling.
-- ============================================================================
-- Setup: two external tables over 10 CIA World Factbook country documents
-- ============================================================================
CREATE ZONE IF NOT EXISTS {{zone_name}} TYPE EXTERNAL
COMMENT 'External tables — demo datasets and file-backed data';
CREATE SCHEMA IF NOT EXISTS {{zone_name}}.json_demos
COMMENT 'JSON-backed external tables';
-- Table 1: countries — flattened overview
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.json_demos.countries
USING JSON
LOCATION '{{data_path}}'
OPTIONS (
json_flatten_config = '{
"root_path": "$",
"include_paths": [
"$.Government.Country name.conventional short form.text",
"$.Government.Capital.name.text",
"$.Government.Government type.text",
"$.Government.Independence.text",
"$.Geography.Location.text",
"$.Geography.Area.total .text",
"$.Geography.Climate.text",
"$.Geography.Terrain.text",
"$.People and Society.Population.total.text",
"$.People and Society.Languages.Languages.text",
"$.People and Society.Religions.text",
"$.Terrorism.Terrorist group(s).text",
"$.Space.Space agency/agencies.text",
"$.Space.Space program overview.text"
],
"exclude_paths": ["$.Introduction.Background"],
"column_mappings": {
"$.Government.Country name.conventional short form.text": "government_country_name_conventional_short_form_text",
"$.Government.Capital.name.text": "government_capital_name_text",
"$.Government.Government type.text": "government_government_type_text",
"$.Geography.Area.total .text": "geography_area_total_text",
"$.People and Society.Population.total.text": "people_and_society_population_total_text",
"$.Terrorism.Terrorist group(s).text": "terrorism_terrorist_group_s_text",
"$.Space.Space agency/agencies.text": "space_space_agency_agencies_text",
"$.Space.Space program overview.text": "space_space_program_overview_text"
},
"max_depth": 5,
"separator": "_",
"infer_types": false
}',
file_metadata = '{"columns":["df_file_name","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.json_demos.countries TO USER {{current_user}};
-- Table 2: country_economy — GDP and sector composition
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.json_demos.country_economy
USING JSON
LOCATION '{{data_path}}'
OPTIONS (
json_flatten_config = '{
"root_path": "$",
"include_paths": [
"$.Government.Country name.conventional short form.text",
"$.Economy.Economic overview.text",
"$.Economy.Real GDP (purchasing power parity).Real GDP (purchasing power parity) 2023.text",
"$.Economy.Real GDP growth rate.Real GDP growth rate 2023.text",
"$.Economy.Real GDP per capita.Real GDP per capita 2023.text",
"$.Economy.GDP (official exchange rate).text",
"$.Economy.Inflation rate (consumer prices).Inflation rate (consumer prices) 2023.text",
"$.Economy.GDP - composition, by sector of origin.agriculture.text",
"$.Economy.GDP - composition, by sector of origin.industry.text",
"$.Economy.GDP - composition, by sector of origin.services.text",
"$.Economy.Agricultural products.text",
"$.Economy.Industries.text",
"$.Economy.Unemployment rate.Unemployment rate 2023.text",
"$.Economy.Exports.Exports 2023.text",
"$.Economy.Imports.Imports 2023.text"
],
"exclude_paths": ["$.Introduction.Background"],
"column_mappings": {
"$.Government.Country name.conventional short form.text": "government_country_name_conventional_short_form_text",
"$.Economy.Real GDP (purchasing power parity).Real GDP (purchasing power parity) 2023.text": "economy_real_gdp_purchasing_power_parity_real_gdp_purchasing_power_parity_2023_text",
"$.Economy.GDP - composition, by sector of origin.agriculture.text": "economy_gdp_composition_by_sector_of_origin_agriculture_text",
"$.Economy.GDP - composition, by sector of origin.industry.text": "economy_gdp_composition_by_sector_of_origin_industry_text",
"$.Economy.GDP - composition, by sector of origin.services.text": "economy_gdp_composition_by_sector_of_origin_services_text"
},
"max_depth": 5,
"separator": "_",
"infer_types": false
}',
file_metadata = '{"columns":["df_file_name","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.json_demos.country_economy TO USER {{current_user}};
-- ============================================================================
-- Queries
-- ============================================================================
-- 10 files = 10 rows
ASSERT ROW_COUNT = 10
SELECT * FROM {{zone_name}}.json_demos.countries;
-- Deep-nested field: 4 levels, keys contain spaces
ASSERT VALUE egypt_count = 1
SELECT COUNT(*) AS egypt_count
FROM {{zone_name}}.json_demos.countries
WHERE government_country_name_conventional_short_form_text = 'Egypt';
-- Schema evolution: Terrorism present for 8/10 countries
ASSERT ROW_COUNT = 10
ASSERT VALUE terrorism_status = 'NO DATA' WHERE government_country_name_conventional_short_form_text = 'Ghana'
ASSERT VALUE terrorism_status = 'NO DATA' WHERE government_country_name_conventional_short_form_text = 'Rwanda'
ASSERT VALUE terrorism_status = 'HAS DATA' WHERE government_country_name_conventional_short_form_text = 'Egypt'
SELECT government_country_name_conventional_short_form_text,
CASE WHEN terrorism_terrorist_group_s_text IS NULL
THEN 'NO DATA' ELSE 'HAS DATA' END AS terrorism_status
FROM {{zone_name}}.json_demos.countries
ORDER BY government_country_name_conventional_short_form_text;
-- Schema evolution: Space present for 7/10 countries
ASSERT ROW_COUNT = 7
SELECT government_country_name_conventional_short_form_text,
space_space_agency_agencies_text
FROM {{zone_name}}.json_demos.countries
WHERE space_space_agency_agencies_text IS NOT NULL
ORDER BY government_country_name_conventional_short_form_text;
-- Economy deep path (4 levels, parentheses in keys)
ASSERT ROW_COUNT = 1
ASSERT VALUE economy_real_gdp_purchasing_power_parity_real_gdp_purchasing_power_parity_2023_text IS NOT NULL
SELECT government_country_name_conventional_short_form_text,
economy_real_gdp_purchasing_power_parity_real_gdp_purchasing_power_parity_2023_text
FROM {{zone_name}}.json_demos.country_economy
WHERE government_country_name_conventional_short_form_text = 'Egypt';
-- file_metadata attributes each row to a country file
ASSERT ROW_COUNT = 10
ASSERT VALUE df_file_name LIKE '%eg.json%' WHERE government_country_name_conventional_short_form_text = 'Egypt'
ASSERT VALUE df_file_name LIKE '%gh.json%' WHERE government_country_name_conventional_short_form_text = 'Ghana'
SELECT df_file_name, government_country_name_conventional_short_form_text
FROM {{zone_name}}.json_demos.countries
ORDER BY df_file_name;
-- ============================================================================
-- Cleanup
-- ============================================================================
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.json_demos.countries WITH FILES;
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.json_demos.country_economy WITH FILES;
DROP SCHEMA IF EXISTS {{zone_name}}.json_demos;
DROP ZONE IF EXISTS {{zone_name}};
## When to Use Use this demo when you are ingesting a directory of JSON documents that share a rough schema but differ in which sections they contain — a very common shape for open-data dumps, regulatory filings, and content feeds. It demonstrates how to extract 4-level-deep paths (keys with spaces and parentheses), exclude verbose subtrees, rename deep paths into analytics-ready columns, and rely on DeltaForge's union-schema behavior to NULL-fill fields that are absent from some files. Two external tables are built from the same 10 CIA World Factbook country profiles: `countries` (broad overview) and `country_economy` (GDP / sector composition). ## What You Will Learn 1. How `include_paths` selects specific fields from 13 top-level sections without materializing the whole document. 2. How to encode JSON keys that contain spaces, parentheses, hyphens, and commas in JSONPath (`$.Economy.GDP - composition, by sector of origin.agriculture.text`). 3. How `exclude_paths` skips expensive subtrees (`$.Introduction.Background` HTML text) even though they would otherwise match a broad include. 4. How `column_mappings` turns a 90-character deep path into a single friendly column name. 5. How `max_depth: 5` bounds flattening on documents that are 3–5 levels deep. 6. How schema evolution works automatically: files missing a section (Terrorism in 2/10, Space in 3/10) yield NULL for those columns, and queries can distinguish "HAS DATA" vs "NO DATA" with a `CASE`. 7. How `file_metadata.df_file_name` exposes the per-file country code (`eg.json`, `gh.json`), which is invaluable for debugging. 8. How `infer_types: false` is the right choice when every value is wrapped (`{"text": "..."}`) and must stay as VARCHAR. 9. How to compose a multi-check `UNION ALL` verification query that collapses every invariant to a single PASS/FAIL grid. ## Prerequisites A DeltaForge catalog with create-zone / create-schema rights and the 10 country JSON files (`cg.json`, `dj.json`, `eg.json`, `et.json`, `gh.json`, `ke.json`, `mo.json`, `ni.json`, `rw.json`, `sf.json`) in `{{data_path}}`. Comfort with JSONPath and the simpler `customers-basics` and `music-catalog` demos is strongly recommended before tackling this one.