Add new columns to a live Delta table with zero data rewrite, rely on implicit NULL fill for historical rows, and backfill only the records that matter.
-- SETUP
CREATE ZONE IF NOT EXISTS demo_zone TYPE EXTERNAL
COMMENT 'External and Delta tables -- demo datasets';
CREATE SCHEMA IF NOT EXISTS demo_zone.delta_demos
COMMENT 'Delta table management tutorial demos';
CREATE DELTA TABLE IF NOT EXISTS demo_zone.delta_demos.contacts (
id INT,
first_name VARCHAR,
last_name VARCHAR,
email VARCHAR
) LOCATION '~/delta-data/contacts';
INSERT INTO demo_zone.delta_demos.contacts VALUES
(1, 'Alice', 'Johnson', 'alice.johnson@example.com'),
(2, 'Bob', 'Smith', 'bob.smith@example.com'),
(3, 'Carol', 'Williams', 'carol.williams@example.com'),
(4, 'David', 'Brown', 'david.brown@example.com'),
(5, 'Eve', 'Davis', 'eve.davis@example.com'),
(6, 'Frank', 'Miller', 'frank.miller@example.com'),
(7, 'Grace', 'Wilson', 'grace.wilson@example.com'),
(8, 'Henry', 'Moore', 'henry.moore@example.com'),
(9, 'Irene', 'Taylor', 'irene.taylor@example.com'),
(10, 'Jack', 'Anderson', 'jack.anderson@example.com'),
(11, 'Karen', 'Thomas', 'karen.thomas@example.com'),
(12, 'Leo', 'Jackson', 'leo.jackson@example.com'),
(13, 'Maria', 'White', 'maria.white@example.com'),
(14, 'Nathan', 'Harris', 'nathan.harris@example.com'),
(15, 'Olivia', 'Martin', 'olivia.martin@example.com'),
(16, 'Paul', 'Garcia', 'paul.garcia@example.com'),
(17, 'Quinn', 'Martinez', 'quinn.martinez@example.com'),
(18, 'Rachel', 'Robinson', 'rachel.robinson@example.com'),
(19, 'Sam', 'Clark', 'sam.clark@example.com'),
(20, 'Tina', 'Lewis', 'tina.lewis@example.com'),
(21, 'Uma', 'Lee', 'uma.lee@example.com'),
(22, 'Victor', 'Walker', 'victor.walker@example.com'),
(23, 'Wendy', 'Hall', 'wendy.hall@example.com'),
(24, 'Xander', 'Allen', 'xander.allen@example.com'),
(25, 'Yolanda', 'Young', 'yolanda.young@example.com'),
(26, 'Zach', 'King', 'zach.king@example.com'),
(27, 'Amber', 'Wright', 'amber.wright@example.com'),
(28, 'Brian', 'Lopez', 'brian.lopez@example.com'),
(29, 'Cindy', 'Hill', 'cindy.hill@example.com'),
(30, 'Derek', 'Scott', 'derek.scott@example.com');
-- QUERIES
-- Baseline: 4 columns, 30 rows
SELECT id, first_name, last_name, email
FROM demo_zone.delta_demos.contacts
WHERE id IN (1, 15, 30)
ORDER BY id;
-- PHASE 1: evolve schema -- metadata-only, zero data rewrite
ALTER TABLE demo_zone.delta_demos.contacts ADD COLUMN phone VARCHAR;
ALTER TABLE demo_zone.delta_demos.contacts ADD COLUMN city VARCHAR;
ALTER TABLE demo_zone.delta_demos.contacts ADD COLUMN signup_date VARCHAR;
-- Old files still exist with 4 columns; the scan returns NULL for the new 3
SELECT id, first_name, phone, city, signup_date
FROM demo_zone.delta_demos.contacts
WHERE id IN (1, 15, 30)
ORDER BY id;
-- PHASE 2: new inserts write all 7 columns physically
INSERT INTO demo_zone.delta_demos.contacts VALUES
(31, 'Elena', 'Foster', 'elena.foster@example.com', '+1-555-0131', 'New York', '2024-03-01'),
(32, 'Felix', 'Reed', 'felix.reed@example.com', '+1-555-0132', 'Los Angeles', '2024-03-02'),
(33, 'Gina', 'Cook', 'gina.cook@example.com', '+1-555-0133', 'Chicago', '2024-03-03'),
(34, 'Hugo', 'Morgan', 'hugo.morgan@example.com', '+1-555-0134', 'Houston', '2024-03-04'),
(35, 'Isla', 'Bell', 'isla.bell@example.com', '+1-555-0135', 'Phoenix', '2024-03-05'),
(36, 'Jake', 'Murphy', 'jake.murphy@example.com', '+1-555-0136', 'San Antonio', '2024-03-06'),
(37, 'Kara', 'Rivera', 'kara.rivera@example.com', '+1-555-0137', 'San Diego', '2024-03-07'),
(38, 'Liam', 'Cooper', 'liam.cooper@example.com', '+1-555-0138', 'Dallas', '2024-03-08'),
(39, 'Mia', 'Bailey', 'mia.bailey@example.com', '+1-555-0139', 'San Jose', '2024-03-09'),
(40, 'Noah', 'Howard', 'noah.howard@example.com', '+1-555-0140', 'Austin', '2024-03-10');
-- PHASE 3: backfill phone for the first 10 original rows -- rewrites only those files
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0101' WHERE id = 1;
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0102' WHERE id = 2;
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0103' WHERE id = 3;
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0104' WHERE id = 4;
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0105' WHERE id = 5;
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0106' WHERE id = 6;
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0107' WHERE id = 7;
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0108' WHERE id = 8;
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0109' WHERE id = 9;
UPDATE demo_zone.delta_demos.contacts SET phone = '+1-555-0110' WHERE id = 10;
-- The three natural row groups after evolution
SELECT
CASE
WHEN id BETWEEN 1 AND 10 THEN 'Group 1: Original + backfilled phone'
WHEN id BETWEEN 11 AND 30 THEN 'Group 2: Original, no backfill'
ELSE 'Group 3: Post-evolution inserts'
END AS row_group,
COUNT(*) AS row_count,
COUNT(phone) AS has_phone,
COUNT(city) AS has_city,
COUNT(signup_date) AS has_signup_date
FROM demo_zone.delta_demos.contacts
GROUP BY CASE
WHEN id BETWEEN 1 AND 10 THEN 'Group 1: Original + backfilled phone'
WHEN id BETWEEN 11 AND 30 THEN 'Group 2: Original, no backfill'
ELSE 'Group 3: Post-evolution inserts'
END
ORDER BY row_group;
-- CLEANUP
DROP DELTA TABLE IF EXISTS demo_zone.delta_demos.contacts WITH FILES;
DROP SCHEMA IF EXISTS demo_zone.delta_demos;
DROP ZONE IF EXISTS demo_zone;
## When to Use When the data model grows (new CRM fields, new regulatory attributes, new product flags) but you cannot afford to recreate the table. Delta treats ALTER TABLE ADD COLUMN as a metadata-only edit on the transaction log: old Parquet files are left untouched, and the scan layer returns NULL for columns that are missing from a given file. Future INSERTs write the evolved schema directly. This demo shows the three row groups that naturally emerge in production: original + selective backfill, original + no backfill, and fully-populated post-evolution writes. ## What You Will Learn 1. Why ALTER TABLE ADD COLUMN is O(1) regardless of table size -- it updates metadata, not data files 2. How Delta fills NULL at read time for files that predate a column 3. How post-evolution INSERTs produce Parquet files that contain the new columns physically 4. How selective UPDATE backfills only the rows that need new values -- and how that rewrites only those files 5. The three natural row groups after evolution (original NULL, original backfilled, post-evolution populated) 6. How COUNT(col) vs COUNT(*) reveals which columns are populated per row group 7. That NULL fill is a read-time operation, so adding many columns is cheap but wide tables pay at query time ## Prerequisites None. The setup seeds 30 rows against the original 4-column schema, and the queries drive three evolution phases.
| Name | Type | Description |
|---|---|---|
TABLE_PATH | Filesystem or object-store location for the evolving Delta table. |