Explode 3+ level order XML into per-line-item rows with CDATA descriptions, exclude internal audit blocks, and build a per-order summary with repeat counts.
-- === SETUP ===
CREATE ZONE IF NOT EXISTS {{zone_name}} TYPE EXTERNAL
COMMENT 'External tables, demo datasets and file-backed data';
CREATE SCHEMA IF NOT EXISTS {{zone_name}}.xml_demos
COMMENT 'XML-backed external tables';
-- Table 1: exploded, one row per line item (11 total)
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.xml_demos.order_lines
USING XML
LOCATION '{{data_path}}'
OPTIONS (
xml_flatten_config = '{
"row_xpath": "//order",
"explode_paths": ["/orders/order/items/item"],
"include_paths": [
"/orders/order/@id",
"/orders/order/@status",
"/orders/order/customer/name",
"/orders/order/customer/tier",
"/orders/order/order_date",
"/orders/order/items/item/@sku",
"/orders/order/items/item/product",
"/orders/order/items/item/description",
"/orders/order/items/item/quantity",
"/orders/order/items/item/unit_price",
"/orders/order/items/item/variant/size",
"/orders/order/items/item/variant/color",
"/orders/order/gift_wrap",
"/orders/order/express",
"/orders/order/shipping_total"
],
"exclude_paths": ["/orders/order/internal_audit"],
"column_mappings": {
"/orders/order/@id": "orders_order_attr_id",
"/orders/order/@status": "orders_order_attr_status",
"/orders/order/customer/name": "orders_order_customer_name",
"/orders/order/customer/tier": "orders_order_customer_tier",
"/orders/order/order_date": "orders_order_order_date",
"/orders/order/items/item/@sku": "orders_order_items_item_attr_sku",
"/orders/order/items/item/product": "orders_order_items_item_product",
"/orders/order/items/item/description": "orders_order_items_item_description",
"/orders/order/items/item/quantity": "orders_order_items_item_quantity",
"/orders/order/items/item/unit_price": "orders_order_items_item_unit_price",
"/orders/order/items/item/variant/size": "orders_order_items_item_variant_size",
"/orders/order/items/item/variant/color": "orders_order_items_item_variant_color",
"/orders/order/gift_wrap": "orders_order_gift_wrap",
"/orders/order/express": "orders_order_express",
"/orders/order/shipping_total": "orders_order_shipping_total"
},
"include_attributes": true,
"separator": "_",
"max_depth": 10,
"strip_namespace_prefixes": true
}',
file_metadata = '{"columns":["df_file_name","df_file_modified","df_dataset","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.xml_demos.order_lines TO USER {{current_user}};
-- Table 2: summarised, one row per order with item count
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.xml_demos.order_summary
USING XML
LOCATION '{{data_path}}'
OPTIONS (
xml_flatten_config = '{
"row_xpath": "//order",
"include_paths": [
"/orders/order/@id",
"/orders/order/@status",
"/orders/order/customer/name",
"/orders/order/customer/email",
"/orders/order/customer/tier",
"/orders/order/order_date",
"/orders/order/items/item",
"/orders/order/gift_wrap",
"/orders/order/express",
"/orders/order/shipping_total"
],
"exclude_paths": ["/orders/order/internal_audit"],
"default_repeat_handling": "count",
"column_mappings": {
"/orders/order/@id": "orders_order_attr_id",
"/orders/order/@status": "orders_order_attr_status",
"/orders/order/customer/name": "orders_order_customer_name",
"/orders/order/customer/email": "orders_order_customer_email",
"/orders/order/customer/tier": "orders_order_customer_tier",
"/orders/order/order_date": "orders_order_order_date",
"/orders/order/items/item": "orders_order_items_item",
"/orders/order/gift_wrap": "orders_order_gift_wrap",
"/orders/order/express": "orders_order_express",
"/orders/order/shipping_total": "orders_order_shipping_total"
},
"include_attributes": true,
"separator": "_",
"max_depth": 10,
"strip_namespace_prefixes": true
}',
file_metadata = '{"columns":["df_file_name","df_file_modified","df_dataset","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.xml_demos.order_summary TO USER {{current_user}};
-- === QUERIES ===
-- 1. Explode count: 7 items from file 1 + 4 items from file 2 = 11
ASSERT ROW_COUNT = 11
SELECT * FROM {{zone_name}}.xml_demos.order_lines;
-- 2. Deep nesting: every line carries its size/color variant
ASSERT VALUE deep_nesting_count = 11
SELECT COUNT(*) FILTER (WHERE orders_order_items_item_variant_color IS NOT NULL) AS deep_nesting_count
FROM {{zone_name}}.xml_demos.order_lines;
-- 3. CDATA: descriptions retain inline HTML
ASSERT VALUE cdata_count = 6
SELECT COUNT(*) FILTER (
WHERE orders_order_items_item_description LIKE '%<b>%'
OR orders_order_items_item_description LIKE '%<em>%'
) AS cdata_count
FROM {{zone_name}}.xml_demos.order_lines;
-- 4. exclude_paths: internal_audit must NOT produce any column
ASSERT VALUE audit_columns = 0
SELECT COUNT(*) AS audit_columns
FROM information_schema.columns
WHERE table_name = 'order_lines'
AND (column_name LIKE '%cost_center%' OR column_name LIKE '%margin_pct%');
-- 5. Summary: one row per order
ASSERT ROW_COUNT = 5
SELECT * FROM {{zone_name}}.xml_demos.order_summary;
-- 6. Revenue by order, float sums use tolerant bounds
ASSERT WARNING VALUE order_total BETWEEN 189.94 AND 189.96 WHERE orders_order_attr_id = 'ORD-1005'
ASSERT WARNING VALUE order_total BETWEEN 159.90 AND 159.92 WHERE orders_order_attr_id = 'ORD-1002'
SELECT orders_order_attr_id, orders_order_customer_name,
SUM(CAST(orders_order_items_item_quantity AS INT) *
CAST(orders_order_items_item_unit_price AS DOUBLE)) AS order_total,
MIN(orders_order_shipping_total) AS shipping
FROM {{zone_name}}.xml_demos.order_lines
GROUP BY orders_order_attr_id, orders_order_customer_name
ORDER BY order_total DESC;
-- === CLEANUP ===
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.xml_demos.order_lines WITH FILES;
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.xml_demos.order_summary WITH FILES;
DROP SCHEMA IF EXISTS {{zone_name}}.xml_demos;
DROP ZONE IF EXISTS {{zone_name}};
## When to Use Use this pattern whenever an XML payload has a genuine parent/child structure, an order with many line items, an invoice with many rows, a shipment with many packages, and analytics need one row per child while still preserving parent context. The same source can feed two tables: an exploded fact table for line-level analysis and a non-exploded summary that counts the children. ## What You Will Learn 1. How `explode_paths` turns one `<order>` into N rows (one per `<item>`), while duplicating parent scalars like `customer/name` onto every exploded row. 2. How to capture 3+ levels of nesting (`order/items/item/variant/color`) as flat columns via `include_paths` plus `column_mappings`. 3. How CDATA sections (`<![CDATA[<b>Premium</b>...]]>`) are unwrapped: the CDATA sentinel is stripped but the HTML inside is preserved as raw text. 4. How `exclude_paths` hides a whole subtree (`internal_audit`) from both tables without having to enumerate every child. 5. How `default_repeat_handling: "count"` turns repeating children into an integer count on the non-exploded summary table. 6. How self-closing XML flags (`<gift_wrap/>`, `<express/>`) surface as presence columns in the output. 7. How to write float-tolerant `ASSERT WARNING VALUE ... BETWEEN lo AND hi` checks for revenue sums that accumulate IEEE 754 rounding. 8. How the same `LOCATION` can back two external tables with different `xml_flatten_config` values, exploded vs. aggregated, without copying data. ## Prerequisites An external zone, order batch XML files with the `<orders><order>...</order></orders>` shape used by the demo (two files, 5 orders, 11 line items), and `ADMIN` on the target schema. Familiarity with the schema-evolution / basic-flattening demo helps but is not required.