Preserve the full audit trail of dimension changes by expiring current rows with MERGE and inserting new versions in a single two-pass pattern.
-- SETUP
CREATE ZONE IF NOT EXISTS demo_zone TYPE EXTERNAL
COMMENT 'External and Delta tables -- demo datasets';
CREATE SCHEMA IF NOT EXISTS demo_zone.delta_demos
COMMENT 'Delta table management tutorial demos';
CREATE DELTA TABLE IF NOT EXISTS demo_zone.delta_demos.policy_dim (
surrogate_key INT,
policy_id VARCHAR,
holder_name VARCHAR,
coverage_type VARCHAR,
annual_premium DOUBLE,
region VARCHAR,
risk_score INT,
valid_from VARCHAR,
valid_to VARCHAR,
is_current INT
) LOCATION '~/delta-data/policy_dim';
INSERT INTO demo_zone.delta_demos.policy_dim VALUES
(1, 'POL-1001', 'Alice Johnson', 'standard', 2400.00, 'northeast', 45, '2024-01-01', '9999-12-31', 1),
(2, 'POL-1002', 'Bob Martinez', 'basic', 1200.00, 'southeast', 30, '2024-01-01', '9999-12-31', 1),
(3, 'POL-1003', 'Carol Chen', 'premium', 4800.00, 'west', 65, '2024-01-01', '9999-12-31', 1),
(4, 'POL-1004', 'David Kim', 'standard', 2600.00, 'midwest', 50, '2024-01-01', '9999-12-31', 1),
(5, 'POL-1005', 'Elena Rodriguez', 'basic', 1100.00, 'pacific', 25, '2024-01-01', '9999-12-31', 1),
(6, 'POL-1006', 'Frank O''Brien', 'platinum', 7200.00, 'northeast', 80, '2024-01-01', '9999-12-31', 1),
(7, 'POL-1007', 'Grace Patel', 'standard', 2200.00, 'southeast', 40, '2024-01-01', '9999-12-31', 1),
(8, 'POL-1008', 'Henry Nakamura', 'premium', 5100.00, 'west', 70, '2024-01-01', '9999-12-31', 1),
(9, 'POL-1009', 'Irene Fischer', 'basic', 1300.00, 'midwest', 28, '2024-01-01', '9999-12-31', 1),
(10, 'POL-1010', 'James Cooper', 'standard', 2500.00, 'pacific', 48, '2024-01-01', '9999-12-31', 1),
(11, 'POL-1011', 'Karen Liu', 'premium', 4600.00, 'northeast', 62, '2024-01-01', '9999-12-31', 1),
(12, 'POL-1012', 'Leo Washington', 'basic', 1400.00, 'southeast', 32, '2024-01-01', '9999-12-31', 1),
(13, 'POL-1013', 'Maria Gonzalez', 'standard', 2300.00, 'west', 42, '2024-01-01', '9999-12-31', 1),
(14, 'POL-1014', 'Nathan Brooks', 'platinum', 6800.00, 'midwest', 75, '2024-01-01', '9999-12-31', 1),
(15, 'POL-1015', 'Olivia Thompson', 'premium', 5400.00, 'pacific', 68, '2024-01-01', '9999-12-31', 1);
CREATE DELTA TABLE IF NOT EXISTS demo_zone.delta_demos.policy_changes (
policy_id VARCHAR,
holder_name VARCHAR,
coverage_type VARCHAR,
annual_premium DOUBLE,
region VARCHAR,
risk_score INT,
effective_date VARCHAR
) LOCATION '~/delta-data/policy_changes';
INSERT INTO demo_zone.delta_demos.policy_changes VALUES
('POL-1001', 'Alice Johnson', 'premium', 4200.00, 'northeast', 52, '2025-01-15'),
('POL-1003', 'Carol Chen', 'platinum', 7500.00, 'west', 68, '2025-01-15'),
('POL-1005', 'Elena Rodriguez', 'standard', 2100.00, 'pacific', 35, '2025-01-15'),
('POL-1007', 'Grace Patel', 'standard', 2800.00, 'midwest', 44, '2025-01-15'),
('POL-1009', 'Irene Fischer', 'basic', 1500.00, 'midwest', 30, '2025-01-15'),
('POL-1010', 'James Cooper', 'premium', 4400.00, 'pacific', 55, '2025-01-15'),
('POL-1012', 'Leo Washington', 'standard', 2000.00, 'southeast', 38, '2025-01-15'),
('POL-1014', 'Nathan Brooks', 'platinum', 7100.00, 'midwest', 72, '2025-01-15');
-- QUERIES
-- Pass 1 -- MERGE: expire current rows that have a change in the source
-- ON predicate includes is_current = 1 so we never expire historical rows
MERGE INTO demo_zone.delta_demos.policy_dim AS target
USING demo_zone.delta_demos.policy_changes AS source
ON target.policy_id = source.policy_id AND target.is_current = 1
WHEN MATCHED THEN
UPDATE SET
valid_to = '2025-01-14',
is_current = 0;
-- Pass 2 -- INSERT: add the new current version for each change
-- surrogate_key = 15 + ROW_NUMBER() produces keys 16..23 deterministically
INSERT INTO demo_zone.delta_demos.policy_dim
SELECT 15 + ROW_NUMBER() OVER (ORDER BY policy_id),
policy_id, holder_name, coverage_type, annual_premium, region, risk_score,
effective_date, '9999-12-31', 1
FROM demo_zone.delta_demos.policy_changes;
-- Full history after SCD2 -- 23 rows, two per changed policy
SELECT surrogate_key, policy_id, holder_name, coverage_type,
annual_premium, region, risk_score, valid_from, valid_to, is_current
FROM demo_zone.delta_demos.policy_dim
ORDER BY policy_id, valid_from;
-- Current/expired counts
SELECT CASE WHEN is_current = 1 THEN 'current' ELSE 'expired' END AS record_status,
COUNT(*) AS record_count
FROM demo_zone.delta_demos.policy_dim
GROUP BY is_current
ORDER BY is_current DESC;
-- History trail for POL-1001: standard -> premium
SELECT surrogate_key, policy_id, coverage_type, annual_premium, valid_from, valid_to, is_current
FROM demo_zone.delta_demos.policy_dim
WHERE policy_id = 'POL-1001'
ORDER BY valid_from;
-- Current portfolio summary (is_current = 1 only) -- the reporting view pattern
SELECT coverage_type,
COUNT(*) AS policy_count,
ROUND(SUM(annual_premium), 2) AS total_premium,
ROUND(AVG(annual_premium), 2) AS avg_premium
FROM demo_zone.delta_demos.policy_dim
WHERE is_current = 1
GROUP BY coverage_type
ORDER BY avg_premium;
-- CLEANUP
DROP DELTA TABLE IF EXISTS demo_zone.delta_demos.policy_changes WITH FILES;
DROP DELTA TABLE IF EXISTS demo_zone.delta_demos.policy_dim WITH FILES;
DROP SCHEMA IF EXISTS demo_zone.delta_demos;
DROP ZONE IF EXISTS demo_zone;
## When to Use Use this when you are building regulated dimension tables (insurance, finance, healthcare, HR) that must answer "what did this record look like on date X?" years after the fact. SCD Type 2 solves that by never updating a row in place: instead, each logical change produces two physical rows -- the old one with its valid_to set to yesterday and is_current flipped to 0, plus a new current row with valid_from set to the effective date. Delta MERGE is the natural fit because the target-side UPDATE and the source-side INSERT can be expressed as one idempotent statement chain. The scenario: an insurance policy table gets 8 incoming policy modifications effective 2025-01-15. You expire the 8 active rows and insert 8 new current rows, leaving the 7 untouched policies alone. The final table has 23 rows -- 15 current (7 untouched + 8 new versions) and 8 expired. ## What You Will Learn 1. The standard two-pass SCD2 pattern: MERGE ... WHEN MATCHED THEN UPDATE, followed by INSERT INTO ... SELECT 2. How to match only the active row version by AND-ing policy_id with is_current = 1 in the MERGE ON clause 3. Why the ON clause includes the current-version predicate -- otherwise you would expire historical rows too 4. How to generate surrogate keys for the new-version rows with ROW_NUMBER() OVER (ORDER BY ...) + offset 5. How valid_from / valid_to date ranges build a queryable history without losing any data 6. How to build a "current view" on top of an SCD2 dimension using WHERE is_current = 1 7. How a later INSERT naturally extends the MERGE pattern because Delta MERGE requires a source-keyed matcher
| Name | Type | Description |
|---|---|---|
DIM_PATH | Delta table location for the SCD2 dimension. | |
CHANGES_PATH | Delta table location for the incoming changes batch. | |
EFFECTIVE_DATE | Effective date for the new versions. The expiry of the old version is set to effective_date - 1 day. |