Enable Change Data Feed to emit per-row insert / update_preimage / update_postimage / delete records so downstream systems can consume only the deltas between two versions.
-- SETUP
CREATE ZONE IF NOT EXISTS demo_zone TYPE EXTERNAL
COMMENT 'External and Delta tables -- demo datasets';
CREATE SCHEMA IF NOT EXISTS demo_zone.delta_demos
COMMENT 'Delta table management tutorial demos';
CREATE DELTA TABLE IF NOT EXISTS demo_zone.delta_demos.customer_accounts (
id INT,
name VARCHAR,
email VARCHAR,
tier VARCHAR,
balance DOUBLE,
status VARCHAR,
created_date VARCHAR
) LOCATION '~/delta-data/customer_accounts'
TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true');
-- V0: seed 40 customers (abbreviated for brevity -- full demo seeds all 40)
INSERT INTO demo_zone.delta_demos.customer_accounts VALUES
(1, 'Alice Morgan', 'alice.morgan@mail.com', 'silver', 5200.00, 'active', '2023-01-05'),
(2, 'Bob Fischer', 'bob.fischer@mail.com', 'silver', 3100.00, 'active', '2023-01-10'),
(3, 'Carol Reeves', 'carol.reeves@mail.com', 'bronze', 1800.00, 'active', '2023-01-15'),
(4, 'Daniel Ortiz', 'daniel.ortiz@mail.com', 'silver', 4500.00, 'active', '2023-02-01'),
(5, 'Emily Watson', 'emily.watson@mail.com', 'bronze', 950.00, 'active', '2023-02-10'),
(6, 'Frank Dubois', 'frank.dubois@mail.com', 'silver', 6700.00, 'active', '2023-02-15'),
(7, 'Grace Nakamura', 'grace.nakamura@mail.com', 'bronze', 2200.00, 'active', '2023-03-01'),
(8, 'Henry Kowalski', 'henry.kowalski@mail.com', 'silver', 8100.00, 'active', '2023-03-10'),
(9, 'Irene Svensson', 'irene.svensson@mail.com', 'bronze', 1500.00, 'active', '2023-03-15'),
(10, 'James Okafor', 'james.okafor@mail.com', 'silver', 3900.00, 'active', '2023-04-01'),
(11, 'Karen Petrova', 'karen.petrova@mail.com', 'bronze', 2800.00, 'active', '2023-04-10'),
(12, 'Leo Andersen', 'leo.andersen@mail.com', 'silver', 7200.00, 'active', '2023-04-15'),
(13, 'Maria Gutierrez', 'maria.gutierrez@mail.com', 'bronze', 1200.00, 'active', '2023-05-01'),
(14, 'Nathan Brooks', 'nathan.brooks@mail.com', 'silver', 5800.00, 'active', '2023-05-10'),
(15, 'Olivia Henriksen', 'olivia.henriksen@mail.com', 'silver', 4100.00, 'active', '2023-05-15'),
(16, 'Patrick Lemoine', 'patrick.lemoine@mail.com', 'bronze', 900.00, 'active', '2023-06-01'),
(17, 'Quinn Tanaka', 'quinn.tanaka@mail.com', 'silver', 6300.00, 'active', '2023-06-10'),
(18, 'Rachel Kim', 'rachel.kim@mail.com', 'bronze', 2100.00, 'active', '2023-06-15'),
(19, 'Samuel Rivera', 'samuel.rivera@mail.com', 'silver', 4800.00, 'active', '2023-07-01'),
(20, 'Tara McBride', 'tara.mcbride@mail.com', 'bronze', 1600.00, 'active', '2023-07-10'),
(21, 'Umar Hassan', 'umar.hassan@mail.com', 'silver', 5500.00, 'active', '2023-07-15'),
(22, 'Vera Johansson', 'vera.johansson@mail.com', 'bronze', 3300.00, 'active', '2023-08-01'),
(23, 'William Cheng', 'william.cheng@mail.com', 'silver', 7800.00, 'active', '2023-08-10'),
(24, 'Xia Huang', 'xia.huang@mail.com', 'bronze', 1400.00, 'active', '2023-08-15'),
(25, 'Yusuf Demir', 'yusuf.demir@mail.com', 'silver', 4200.00, 'active', '2023-09-01'),
(26, 'Zara Patel', 'zara.patel@mail.com', 'bronze', 2600.00, 'active', '2023-09-10'),
(27, 'Adam Clarke', 'adam.clarke@mail.com', 'silver', 5900.00, 'active', '2023-09-15'),
(28, 'Beatrice Novak', 'beatrice.novak@mail.com', 'bronze', 1100.00, 'active', '2023-10-01'),
(29, 'Carlos Mendez', 'carlos.mendez@mail.com', 'silver', 6500.00, 'active', '2023-10-10'),
(30, 'Diana Frost', 'diana.frost@mail.com', 'bronze', 1900.00, 'active', '2023-10-15'),
(31, 'Erik Lindgren', 'erik.lindgren@mail.com', 'silver', 8500.00, 'active', '2023-11-01'),
(32, 'Fatima Al-Rashid', 'fatima.alrashid@mail.com', 'bronze', 2400.00, 'active', '2023-11-10'),
(33, 'Gabriel Costa', 'gabriel.costa@mail.com', 'silver', 7100.00, 'active', '2023-11-15'),
(34, 'Helen Park', 'helen.park@mail.com', 'bronze', 1700.00, 'active', '2023-12-01'),
(35, 'Ivan Volkov', 'ivan.volkov@mail.com', 'silver', 4600.00, 'active', '2023-12-10'),
(36, 'Julia Santos', 'julia.santos@mail.com', 'bronze', 3000.00, 'active', '2023-12-15'),
(37, 'Kevin O''Brien', 'kevin.obrien@mail.com', 'silver', 5100.00, 'active', '2024-01-05'),
(38, 'Linda Nguyen', 'linda.nguyen@mail.com', 'bronze', 2700.00, 'active', '2024-01-10'),
(39, 'Marco Bianchi', 'marco.bianchi@mail.com', 'silver', 6800.00, 'active', '2024-01-15'),
(40, 'Nadia Kozlov', 'nadia.kozlov@mail.com', 'bronze', 1300.00, 'active', '2024-02-01');
-- QUERIES
-- Baseline tier distribution
SELECT tier,
COUNT(*) AS customer_count,
ROUND(AVG(balance), 2) AS avg_balance
FROM demo_zone.delta_demos.customer_accounts
GROUP BY tier
ORDER BY tier;
-- V1: UPDATE -- promote top 10 by balance to gold
-- CDF will emit 10 update_preimage + 10 update_postimage rows at _commit_version = 1
UPDATE demo_zone.delta_demos.customer_accounts
SET tier = 'gold'
WHERE id IN (31, 8, 23, 12, 33, 39, 6, 29, 17, 27);
-- V2: INSERT -- 8 new customers
-- CDF will emit 8 insert rows at _commit_version = 2
INSERT INTO demo_zone.delta_demos.customer_accounts
SELECT * FROM (VALUES
(41, 'Oscar Fernandez', 'oscar.fernandez@mail.com', 'bronze', 2500.00, 'active', '2024-03-01'),
(42, 'Priya Sharma', 'priya.sharma@mail.com', 'silver', 4800.00, 'active', '2024-03-05'),
(43, 'Remy Laurent', 'remy.laurent@mail.com', 'bronze', 1600.00, 'active', '2024-03-10'),
(44, 'Sofia Rossi', 'sofia.rossi@mail.com', 'silver', 5300.00, 'active', '2024-03-15'),
(45, 'Tariq Mansour', 'tariq.mansour@mail.com', 'bronze', 900.00, 'active', '2024-04-01'),
(46, 'Uma Reddy', 'uma.reddy@mail.com', 'silver', 3700.00, 'active', '2024-04-05'),
(47, 'Viktor Novak', 'viktor.novak@mail.com', 'bronze', 2100.00, 'active', '2024-04-10'),
(48, 'Wendy Chang', 'wendy.chang@mail.com', 'silver', 4400.00, 'active', '2024-04-15')
) AS t(id, name, email, tier, balance, status, created_date);
-- V3a: UPDATE -- mark 3 accounts closed
-- CDF: 3 update_preimage + 3 update_postimage at _commit_version = 3
UPDATE demo_zone.delta_demos.customer_accounts
SET status = 'closed'
WHERE id IN (16, 28, 45);
-- V3b: DELETE -- remove the closed accounts
-- CDF: 3 delete rows at _commit_version = 4
DELETE FROM demo_zone.delta_demos.customer_accounts
WHERE status = 'closed';
-- V4: UPDATE -- 20% balance bump for 5 gold customers
-- CDF: 5 update_preimage + 5 update_postimage at _commit_version = 5
UPDATE demo_zone.delta_demos.customer_accounts
SET balance = ROUND(balance * 1.20, 2)
WHERE id IN (31, 8, 23, 12, 33);
-- Final snapshot: 45 rows (40 + 8 - 3)
SELECT COUNT(*) AS total_count FROM demo_zone.delta_demos.customer_accounts;
SELECT tier, COUNT(*) AS customer_count
FROM demo_zone.delta_demos.customer_accounts
GROUP BY tier
ORDER BY tier;
-- Verify the balance bump landed correctly
SELECT balance FROM demo_zone.delta_demos.customer_accounts WHERE id = 31;
-- CLEANUP
DROP DELTA TABLE IF EXISTS demo_zone.delta_demos.customer_accounts WITH FILES;
DROP SCHEMA IF EXISTS demo_zone.delta_demos;
DROP ZONE IF EXISTS demo_zone;
## When to Use When a downstream system -- a data warehouse mirror, a search index, an audit store, a streaming consumer -- needs to stay in sync with a Delta table without re-reading the full snapshot every time. CDF records the exact row-level deltas between any two versions as a read-only side channel. Without CDF you have two options: re-read everything (expensive) or diff snapshots yourself (slow and fragile). With CDF the engine exposes a table_changes source with four _change_type values -- insert, update_preimage, update_postimage, delete -- plus _commit_version and _commit_timestamp metadata columns. You turn it on at create time (or via ALTER TABLE SET TBLPROPERTIES) and downstream consumers query only the versions they have not yet processed. The scenario: a customer_accounts table with CDF enabled. V0 seeds 40 customers. V1 promotes 10 to gold tier (UPDATE). V2 adds 8 new customers (INSERT). V3 closes 3 accounts via UPDATE (status=closed) then DELETE. V4 grants a 20% balance bump to 5 premium customers. Every step produces a precise row-level change record. ## What You Will Learn 1. How to enable CDF at creation time via TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true') 2. The four _change_type values CDF emits -- insert, update_preimage, update_postimage, delete -- and what each represents 3. Why UPDATE always produces two rows in the change feed (before + after images) -- this is what makes CDF useful for audit and diff computation 4. How a two-step soft-delete pattern (UPDATE status='closed', then DELETE) shows up as 4 logical events per row in the feed 5. How to drive incremental ETL by filtering on _commit_version between the last-processed checkpoint and the latest 6. That CDF increases write cost (each commit also writes _change_data files) but decreases downstream read cost dramatically 7. How VACUUM interacts with CDF -- CDF files are tied to commit retention, and the DeltaForge VACUUM CDC_RETAIN clause lets you keep them longer than data files if needed ## Prerequisites None. The setup seeds 40 customer accounts with CDF enabled. The queries drive V1..V4 to generate all four change types.
| Name | Type | Description |
|---|---|---|
TABLE_PATH | Filesystem or object-store location for the Delta table (with CDF enabled). |