CSV Advanced Options Testbench

Eight CSV external tables, each exercising one parsing option (delimiter, null_value, comment_char, skip_starting_rows, max_rows, trim_whitespace, quote, combined) with data designed so wrong parsing produces obviously wrong results.

Category: csv

Syntax

-- DEMO: CSV Advanced Options Testbench
-- Difficulty: intermediate | Time: ~5 min
-- Requires: 8 small CSV files (01_..08_)

-- ========================================================================
-- SETUP
-- ========================================================================

CREATE ZONE IF NOT EXISTS demo
    TYPE EXTERNAL
    COMMENT 'External tables -- demo datasets and file-backed data';

CREATE SCHEMA IF NOT EXISTS demo.csv_demos
    COMMENT 'CSV-backed external tables';

-- 1. delimiter = '|'
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.opt_delimiter
USING CSV
LOCATION '~/delta-data/csv/options/01_pipe_delimited.csv'
OPTIONS (has_header = 'true', delimiter = '|');

-- 2. null_value = 'N/A' -- literal 'N/A' becomes SQL NULL
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.opt_null_value
USING CSV
LOCATION '~/delta-data/csv/options/02_null_markers.csv'
OPTIONS (has_header = 'true', null_value = 'N/A');

-- 3. comment_char = '#' -- lines starting with # are skipped before parsing
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.opt_comment
USING CSV
LOCATION '~/delta-data/csv/options/03_comment_lines.csv'
OPTIONS (has_header = 'true', comment_char = '#');

-- 4. skip_starting_rows = '3' -- 3 banner lines before the real header
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.opt_skip_rows
USING CSV
LOCATION '~/delta-data/csv/options/04_skip_metadata.csv'
OPTIONS (has_header = 'true', skip_starting_rows = '3');

-- 5. max_rows = '5' -- cap rows even though file has 10
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.opt_max_rows
USING CSV
LOCATION '~/delta-data/csv/options/05_max_rows.csv'
OPTIONS (has_header = 'true', max_rows = '5');

-- 6. trim_whitespace = 'true' -- strip leading/trailing spaces from values
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.opt_trim
USING CSV
LOCATION '~/delta-data/csv/options/06_whitespace.csv'
OPTIONS (has_header = 'true', trim_whitespace = 'true');

-- 7. delimiter = ';' + quote = '"' -- semicolons inside quoted fields kept
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.opt_quoted
USING CSV
LOCATION '~/delta-data/csv/options/07_semicolon_quoted.csv'
OPTIONS (has_header = 'true', delimiter = ';', quote = '"');

-- 8. all-of-the-above combined
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.opt_combined
USING CSV
LOCATION '~/delta-data/csv/options/08_combined.csv'
OPTIONS (
    has_header = 'true',
    delimiter = '|',
    comment_char = '#',
    null_value = 'N/A',
    trim_whitespace = 'true'
);

-- ========================================================================
-- QUERIES
-- ========================================================================

-- 1. delimiter works -> 5 parsed rows with 4 separate columns.
SELECT id, name, amount, category
FROM demo.csv_demos.opt_delimiter
ORDER BY id;

-- 2. null_value works -> 2 NULL scores + 2 NULL statuses.
SELECT COUNT(*) FILTER (WHERE score  IS NULL) AS null_score_count,
       COUNT(*) FILTER (WHERE status IS NULL) AS null_status_count
FROM demo.csv_demos.opt_null_value;

-- 3. comment_char works -> only 3 real data rows.
SELECT id, sensor, temperature, humidity
FROM demo.csv_demos.opt_comment
ORDER BY id;

-- 4. skip_starting_rows works -> proper columns, 5 rows.
SELECT id, product, warehouse, quantity, unit_cost
FROM demo.csv_demos.opt_skip_rows
ORDER BY id;

-- 5. max_rows works -> exactly 5 rows, first-5 sum = 150.
SELECT COUNT(*) AS row_count,
       SUM(CAST(value AS INT)) AS total_value
FROM demo.csv_demos.opt_max_rows;

-- 6. trim_whitespace works -> 'Alice' is length 5, not 9.
SELECT id, name, LENGTH(name) AS name_len, city, LENGTH(city) AS city_len
FROM demo.csv_demos.opt_trim
WHERE CAST(id AS INT) = 1;

-- 7. quote works -> 4 rows, semicolons inside descriptions kept in one column.
SELECT id, name, description, price
FROM demo.csv_demos.opt_quoted
ORDER BY id;

-- 8. combined options -> 5 rows, 2 NULL scores, names trimmed.
SELECT id, name, LENGTH(name) AS name_len, score, department
FROM demo.csv_demos.opt_combined
ORDER BY CAST(id AS INT);

-- VERIFY: one PASS/FAIL row per option.
SELECT 'delimiter'          AS option_name, CASE WHEN COUNT(*) = 5 THEN 'PASS' ELSE 'FAIL' END AS result FROM demo.csv_demos.opt_delimiter
UNION ALL
SELECT 'null_value',          CASE WHEN COUNT(*) FILTER (WHERE score IS NULL) = 2 THEN 'PASS' ELSE 'FAIL' END FROM demo.csv_demos.opt_null_value
UNION ALL
SELECT 'comment_char',        CASE WHEN COUNT(*) = 3 THEN 'PASS' ELSE 'FAIL' END FROM demo.csv_demos.opt_comment
UNION ALL
SELECT 'skip_starting_rows',  CASE WHEN COUNT(*) = 5 THEN 'PASS' ELSE 'FAIL' END FROM demo.csv_demos.opt_skip_rows
UNION ALL
SELECT 'max_rows',            CASE WHEN COUNT(*) = 5 THEN 'PASS' ELSE 'FAIL' END FROM demo.csv_demos.opt_max_rows
UNION ALL
SELECT 'trim_whitespace',     CASE WHEN COUNT(*) FILTER (WHERE name = 'Alice') = 1 THEN 'PASS' ELSE 'FAIL' END FROM demo.csv_demos.opt_trim
UNION ALL
SELECT 'semicolon_quoted',    CASE WHEN COUNT(*) = 4 THEN 'PASS' ELSE 'FAIL' END FROM demo.csv_demos.opt_quoted
UNION ALL
SELECT 'combined_options',   CASE WHEN COUNT(*) = 5 AND COUNT(*) FILTER (WHERE score IS NULL) = 2 THEN 'PASS' ELSE 'FAIL' END FROM demo.csv_demos.opt_combined
ORDER BY option_name;

-- ========================================================================
-- CLEANUP
-- ========================================================================

DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.opt_delimiter  WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.opt_null_value WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.opt_comment    WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.opt_skip_rows  WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.opt_max_rows   WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.opt_trim       WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.opt_quoted     WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.opt_combined   WITH FILES;
DROP SCHEMA IF EXISTS demo.csv_demos;
DROP ZONE IF EXISTS demo;

Description

## When to Use Use this demo when you need a reference for every CSV parsing option DeltaForge exposes through `CREATE EXTERNAL TABLE ... USING CSV OPTIONS (...)`. It is also the canonical regression test for the GUI's Advanced CSV Options panel: each option is wired to its own table over a purpose-built input file, so if the option is not plumbed end-to-end, the verification query returns the wrong row count or the wrong value. Cover the delimiters you will actually meet in the wild: pipe, semicolon (with embedded semicolons in quoted fields), and comma. Exercise the parser's robustness to null markers (`N/A`), banner/metadata lines at the top of a file, comment lines starting with `#`, leading/trailing whitespace, and row caps (`max_rows`). A final combined table proves that multiple options compose correctly in a single table definition. ## What You Will Learn 1. How `delimiter`, `quote`, `null_value`, `comment_char`, `skip_starting_rows`, `max_rows`, and `trim_whitespace` flow into the CSV reader. 2. How `has_header` interacts with `skip_starting_rows` (skip is counted BEFORE the header is read). 3. How to combine multiple options on one table without them interfering. 4. How to build a PASS/FAIL summary query with UNION ALL so a human or CI job can see at a glance which options are wired. 5. Which options are recognised by the GUI but not yet backed by the engine (listed in the demo README), so you do not waste time on them. ## Prerequisites Requires eight small CSV files in `demos/csv/csv-options-testbench/data/` (01_pipe_delimited.csv through 08_combined.csv). Point `data_path` at that directory. No prior demo state is required.

Parameters

NameTypeDescription
data_pathDirectory containing the eight purpose-built CSV option-test files (01_..08_).
zone_nameZone where the csv_demos schema is created.

Pitfalls

See Also

Open in interactive docs →   DeltaForge home →