XML E-Commerce: Order Line Explosion

Explode 3+ level order XML into per-line-item rows with CDATA descriptions, exclude internal audit blocks, and build a per-order summary with repeat counts.

Category: xml

Syntax

-- === SETUP ===
CREATE ZONE IF NOT EXISTS {{zone_name}} TYPE EXTERNAL
    COMMENT 'External tables, demo datasets and file-backed data';
CREATE SCHEMA IF NOT EXISTS {{zone_name}}.xml_demos
    COMMENT 'XML-backed external tables';

-- Table 1: exploded, one row per line item (11 total)
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.xml_demos.order_lines
USING XML
LOCATION '{{data_path}}'
OPTIONS (
    xml_flatten_config = '{
        "row_xpath": "//order",
        "explode_paths": ["/orders/order/items/item"],
        "include_paths": [
            "/orders/order/@id",
            "/orders/order/@status",
            "/orders/order/customer/name",
            "/orders/order/customer/tier",
            "/orders/order/order_date",
            "/orders/order/items/item/@sku",
            "/orders/order/items/item/product",
            "/orders/order/items/item/description",
            "/orders/order/items/item/quantity",
            "/orders/order/items/item/unit_price",
            "/orders/order/items/item/variant/size",
            "/orders/order/items/item/variant/color",
            "/orders/order/gift_wrap",
            "/orders/order/express",
            "/orders/order/shipping_total"
        ],
        "exclude_paths": ["/orders/order/internal_audit"],
        "column_mappings": {
            "/orders/order/@id":                             "orders_order_attr_id",
            "/orders/order/@status":                         "orders_order_attr_status",
            "/orders/order/customer/name":                   "orders_order_customer_name",
            "/orders/order/customer/tier":                   "orders_order_customer_tier",
            "/orders/order/order_date":                      "orders_order_order_date",
            "/orders/order/items/item/@sku":                 "orders_order_items_item_attr_sku",
            "/orders/order/items/item/product":              "orders_order_items_item_product",
            "/orders/order/items/item/description":          "orders_order_items_item_description",
            "/orders/order/items/item/quantity":             "orders_order_items_item_quantity",
            "/orders/order/items/item/unit_price":           "orders_order_items_item_unit_price",
            "/orders/order/items/item/variant/size":         "orders_order_items_item_variant_size",
            "/orders/order/items/item/variant/color":        "orders_order_items_item_variant_color",
            "/orders/order/gift_wrap":                       "orders_order_gift_wrap",
            "/orders/order/express":                         "orders_order_express",
            "/orders/order/shipping_total":                  "orders_order_shipping_total"
        },
        "include_attributes": true,
        "separator": "_",
        "max_depth": 10,
        "strip_namespace_prefixes": true
    }',
    file_metadata = '{"columns":["df_file_name","df_file_modified","df_dataset","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.xml_demos.order_lines TO USER {{current_user}};

-- Table 2: summarised, one row per order with item count
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.xml_demos.order_summary
USING XML
LOCATION '{{data_path}}'
OPTIONS (
    xml_flatten_config = '{
        "row_xpath": "//order",
        "include_paths": [
            "/orders/order/@id",
            "/orders/order/@status",
            "/orders/order/customer/name",
            "/orders/order/customer/email",
            "/orders/order/customer/tier",
            "/orders/order/order_date",
            "/orders/order/items/item",
            "/orders/order/gift_wrap",
            "/orders/order/express",
            "/orders/order/shipping_total"
        ],
        "exclude_paths": ["/orders/order/internal_audit"],
        "default_repeat_handling": "count",
        "column_mappings": {
            "/orders/order/@id":             "orders_order_attr_id",
            "/orders/order/@status":         "orders_order_attr_status",
            "/orders/order/customer/name":   "orders_order_customer_name",
            "/orders/order/customer/email":  "orders_order_customer_email",
            "/orders/order/customer/tier":   "orders_order_customer_tier",
            "/orders/order/order_date":      "orders_order_order_date",
            "/orders/order/items/item":      "orders_order_items_item",
            "/orders/order/gift_wrap":       "orders_order_gift_wrap",
            "/orders/order/express":         "orders_order_express",
            "/orders/order/shipping_total":  "orders_order_shipping_total"
        },
        "include_attributes": true,
        "separator": "_",
        "max_depth": 10,
        "strip_namespace_prefixes": true
    }',
    file_metadata = '{"columns":["df_file_name","df_file_modified","df_dataset","df_row_number"]}'
);
GRANT ADMIN ON TABLE {{zone_name}}.xml_demos.order_summary TO USER {{current_user}};

-- === QUERIES ===
-- 1. Explode count: 7 items from file 1 + 4 items from file 2 = 11
ASSERT ROW_COUNT = 11
SELECT * FROM {{zone_name}}.xml_demos.order_lines;

-- 2. Deep nesting: every line carries its size/color variant
ASSERT VALUE deep_nesting_count = 11
SELECT COUNT(*) FILTER (WHERE orders_order_items_item_variant_color IS NOT NULL) AS deep_nesting_count
FROM {{zone_name}}.xml_demos.order_lines;

-- 3. CDATA: descriptions retain inline HTML
ASSERT VALUE cdata_count = 6
SELECT COUNT(*) FILTER (
    WHERE orders_order_items_item_description LIKE '%<b>%'
       OR orders_order_items_item_description LIKE '%<em>%'
) AS cdata_count
FROM {{zone_name}}.xml_demos.order_lines;

-- 4. exclude_paths: internal_audit must NOT produce any column
ASSERT VALUE audit_columns = 0
SELECT COUNT(*) AS audit_columns
FROM information_schema.columns
WHERE table_name = 'order_lines'
  AND (column_name LIKE '%cost_center%' OR column_name LIKE '%margin_pct%');

-- 5. Summary: one row per order
ASSERT ROW_COUNT = 5
SELECT * FROM {{zone_name}}.xml_demos.order_summary;

-- 6. Revenue by order, float sums use tolerant bounds
ASSERT WARNING VALUE order_total BETWEEN 189.94 AND 189.96 WHERE orders_order_attr_id = 'ORD-1005'
ASSERT WARNING VALUE order_total BETWEEN 159.90 AND 159.92 WHERE orders_order_attr_id = 'ORD-1002'
SELECT orders_order_attr_id, orders_order_customer_name,
       SUM(CAST(orders_order_items_item_quantity AS INT) *
           CAST(orders_order_items_item_unit_price AS DOUBLE)) AS order_total,
       MIN(orders_order_shipping_total) AS shipping
FROM {{zone_name}}.xml_demos.order_lines
GROUP BY orders_order_attr_id, orders_order_customer_name
ORDER BY order_total DESC;

-- === CLEANUP ===
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.xml_demos.order_lines   WITH FILES;
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.xml_demos.order_summary WITH FILES;
DROP SCHEMA IF EXISTS {{zone_name}}.xml_demos;
DROP ZONE IF EXISTS {{zone_name}};

Description

## When to Use Use this pattern whenever an XML payload has a genuine parent/child structure, an order with many line items, an invoice with many rows, a shipment with many packages, and analytics need one row per child while still preserving parent context. The same source can feed two tables: an exploded fact table for line-level analysis and a non-exploded summary that counts the children. ## What You Will Learn 1. How `explode_paths` turns one `<order>` into N rows (one per `<item>`), while duplicating parent scalars like `customer/name` onto every exploded row. 2. How to capture 3+ levels of nesting (`order/items/item/variant/color`) as flat columns via `include_paths` plus `column_mappings`. 3. How CDATA sections (`<![CDATA[<b>Premium</b>...]]>`) are unwrapped: the CDATA sentinel is stripped but the HTML inside is preserved as raw text. 4. How `exclude_paths` hides a whole subtree (`internal_audit`) from both tables without having to enumerate every child. 5. How `default_repeat_handling: "count"` turns repeating children into an integer count on the non-exploded summary table. 6. How self-closing XML flags (`<gift_wrap/>`, `<express/>`) surface as presence columns in the output. 7. How to write float-tolerant `ASSERT WARNING VALUE ... BETWEEN lo AND hi` checks for revenue sums that accumulate IEEE 754 rounding. 8. How the same `LOCATION` can back two external tables with different `xml_flatten_config` values, exploded vs. aggregated, without copying data. ## Prerequisites An external zone, order batch XML files with the `<orders><order>...</order></orders>` shape used by the demo (two files, 5 orders, 11 line items), and `ADMIN` on the target schema. Familiarity with the schema-evolution / basic-flattening demo helps but is not required.

Pitfalls

Open in interactive docs →   DeltaForge home →