Read 80 e-commerce orders across two Avro files with mixed codecs, exercising date/timestamp-millis logical types, nullable unions, and integer-cent monetary arithmetic.
-- ==========================================================================
-- SECTION 1: Zone & Schema
-- ==========================================================================
CREATE ZONE IF NOT EXISTS {{zone_name}} TYPE EXTERNAL
COMMENT 'E-commerce order analytics with Avro logical types';
CREATE SCHEMA IF NOT EXISTS {{zone_name}}.ecommerce
COMMENT 'Online retail order data: Q1/Q2 2025';
-- ==========================================================================
-- SECTION 2: External tables (schema auto-detected from Avro headers)
-- ==========================================================================
-- Multi-file scan across mixed codecs (null + deflate).
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.ecommerce.all_orders
USING AVRO
LOCATION '{{data_path}}'
OPTIONS (
file_metadata = '{"columns":["df_file_name","df_row_number"]}'
);
-- Q1-only via glob file_filter.
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.ecommerce.q1_orders
USING AVRO
LOCATION '{{data_path}}'
OPTIONS (
file_filter = '*q1*',
file_metadata = '{"columns":["df_file_name"]}'
);
-- max_rows caps EACH file independently (2 files x 10 rows = 20 rows).
CREATE EXTERNAL TABLE IF NOT EXISTS {{zone_name}}.ecommerce.sample_orders
USING AVRO
LOCATION '{{data_path}}'
OPTIONS (
max_rows = '10'
);
GRANT ADMIN ON TABLE {{zone_name}}.ecommerce.all_orders TO USER {{current_user}};
GRANT ADMIN ON TABLE {{zone_name}}.ecommerce.q1_orders TO USER {{current_user}};
GRANT ADMIN ON TABLE {{zone_name}}.ecommerce.sample_orders TO USER {{current_user}};
-- ==========================================================================
-- SECTION 3: Representative queries with ASSERT guards
-- ==========================================================================
-- 3a. Full scan, logical types intact (80 rows).
ASSERT ROW_COUNT = 80
ASSERT VALUE unit_price_cents = 500 WHERE order_id = 'ORD-10000'
SELECT order_id, customer_name, order_date, order_timestamp_ms,
product_category, quantity, unit_price_cents,
discount_pct, notes, df_file_name
FROM {{zone_name}}.ecommerce.all_orders
ORDER BY order_id;
-- 3b. file_filter isolates Q1 (40 rows).
ASSERT ROW_COUNT = 40
SELECT order_id, customer_name, product_category, quantity, unit_price_cents
FROM {{zone_name}}.ecommerce.q1_orders
ORDER BY order_id;
-- 3c. max_rows: 2 files x 10 = 20.
ASSERT ROW_COUNT = 20
SELECT order_id, product_category, unit_price_cents
FROM {{zone_name}}.ecommerce.sample_orders
ORDER BY order_id;
-- 3d. Integer-cent revenue (exact arithmetic, no float drift).
ASSERT ROW_COUNT = 5
ASSERT VALUE total_revenue_cents = 399840 WHERE product_category = 'Sports'
SELECT product_category,
COUNT(*) AS order_count,
SUM(quantity) AS total_items,
SUM(quantity * unit_price_cents) AS total_revenue_cents
FROM {{zone_name}}.ecommerce.all_orders
GROUP BY product_category
ORDER BY total_revenue_cents;
-- 3e. Nullable unions round-trip to SQL NULL.
ASSERT VALUE orders_with_discount = 60
ASSERT VALUE orders_without_discount = 20
ASSERT VALUE orders_with_notes = 32
ASSERT VALUE orders_without_notes = 48
SELECT COUNT(*) AS total_orders,
COUNT(discount_pct) AS orders_with_discount,
SUM(CASE WHEN discount_pct IS NULL THEN 1 ELSE 0 END) AS orders_without_discount,
COUNT(notes) AS orders_with_notes,
SUM(CASE WHEN notes IS NULL THEN 1 ELSE 0 END) AS orders_without_notes
FROM {{zone_name}}.ecommerce.all_orders;
-- ==========================================================================
-- SECTION 4: Cleanup
-- ==========================================================================
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.ecommerce.all_orders WITH FILES;
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.ecommerce.q1_orders WITH FILES;
DROP EXTERNAL TABLE IF EXISTS {{zone_name}}.ecommerce.sample_orders WITH FILES;
DROP SCHEMA IF EXISTS {{zone_name}}.ecommerce;
## When to Use Reach for this demo when you need to validate DeltaForge's Avro reader against the logical-type surface used by most transactional systems: `date` (int32 days-since-epoch), `timestamp-millis` (int64 ms-since-epoch), and nullable unions of the form `["null", T]`. It is also the right starting point when you want to confirm that Avro's per-file codec negotiation (null vs deflate) is transparent at query time, both files are read by the same `CREATE EXTERNAL TABLE` with no codec hint. ## What You Will Learn 1. How Avro logical types (`date`, `timestamp-millis`) surface as Arrow/SQL `DATE` and `TIMESTAMP` without any manual cast. 2. How nullable unions (`["null", "double"]`, `["null", "string"]`) become SQL-nullable columns that interact correctly with `IS NULL`, `COUNT(col)`, and `COALESCE`. 3. How a single external table reads heterogeneously compressed files (null codec + deflate codec) without user intervention. 4. How `file_filter` isolates a single partition file (e.g. Q1) by glob, and how `max_rows` bounds each file independently, not the whole table. 5. How to do safe monetary arithmetic using integer cents to avoid IEEE-754 drift in Avro `double` fields. ## Prerequisites - DeltaForge with Avro reader enabled (default). - Read access to the demo `data/` directory containing `orders_q1.avro` and `orders_q2.avro`. - Permission to create a zone and schema, or an existing `{{zone_name}}` you own.