Classic Northwind sample database as 11 semicolon-delimited CSV external tables (830 orders, 2,155 line items) -- joins, aggregations, shipping analysis, territory coverage, and cross-table revenue reports.
-- DEMO: Northwind Trading Company
-- Difficulty: beginner | Time: ~5 min
-- Requires: 11 semicolon-delimited CSV files
-- ========================================================================
-- SETUP
-- ========================================================================
CREATE ZONE IF NOT EXISTS demo
TYPE EXTERNAL
COMMENT 'External tables -- demo datasets and file-backed data';
CREATE SCHEMA IF NOT EXISTS demo.csv_demos
COMMENT 'CSV-backed external tables';
-- All 11 Northwind tables are semicolon-delimited with a header row.
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_customers
USING CSV LOCATION '~/delta-data/csv/northwind/customers.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_employees
USING CSV LOCATION '~/delta-data/csv/northwind/employees.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_orders
USING CSV LOCATION '~/delta-data/csv/northwind/orders.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_order_details
USING CSV LOCATION '~/delta-data/csv/northwind/order_details.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_products
USING CSV LOCATION '~/delta-data/csv/northwind/products.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_categories
USING CSV LOCATION '~/delta-data/csv/northwind/categories.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_suppliers
USING CSV LOCATION '~/delta-data/csv/northwind/suppliers.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_shippers
USING CSV LOCATION '~/delta-data/csv/northwind/shippers.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_regions
USING CSV LOCATION '~/delta-data/csv/northwind/regions.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_territories
USING CSV LOCATION '~/delta-data/csv/northwind/territories.csv'
OPTIONS (header = 'true', delimiter = ';');
CREATE EXTERNAL TABLE IF NOT EXISTS demo.csv_demos.nw_employee_territories
USING CSV LOCATION '~/delta-data/csv/northwind/employee_territories.csv'
OPTIONS (header = 'true', delimiter = ';');
-- ========================================================================
-- QUERIES
-- ========================================================================
-- 1. Top 10 customers by total order value (customers -> orders -> order_details).
SELECT
c.company_name,
COUNT(DISTINCT o.order_id) AS order_count,
ROUND(SUM(CAST(od.unit_price AS NUMERIC) * CAST(od.quantity AS INT) * (1 - CAST(od.discount AS NUMERIC))), 2) AS total_value
FROM demo.csv_demos.nw_customers c
JOIN demo.csv_demos.nw_orders o ON c.customer_id = o.customer_id
JOIN demo.csv_demos.nw_order_details od ON o.order_id = od.order_id
GROUP BY c.company_name
ORDER BY total_value DESC
LIMIT 10;
-- 2. Revenue by product category (order_details -> products -> categories).
SELECT
cat.category_name,
COUNT(DISTINCT p.product_id) AS product_count,
ROUND(SUM(CAST(od.unit_price AS NUMERIC) * CAST(od.quantity AS INT) * (1 - CAST(od.discount AS NUMERIC))), 2) AS total_revenue
FROM demo.csv_demos.nw_order_details od
JOIN demo.csv_demos.nw_products p ON od.product_id = p.product_id
JOIN demo.csv_demos.nw_categories cat ON p.category_id = cat.category_id
GROUP BY cat.category_name
ORDER BY total_revenue DESC;
-- 3. Employee sales performance.
SELECT
e.first_name || ' ' || e.last_name AS employee_name,
e.title,
COUNT(DISTINCT o.order_id) AS orders_handled,
ROUND(SUM(CAST(od.unit_price AS NUMERIC) * CAST(od.quantity AS INT) * (1 - CAST(od.discount AS NUMERIC))), 2) AS total_sales
FROM demo.csv_demos.nw_employees e
JOIN demo.csv_demos.nw_orders o ON e.employee_id = o.employee_id
JOIN demo.csv_demos.nw_order_details od ON o.order_id = od.order_id
GROUP BY e.first_name, e.last_name, e.title
ORDER BY total_sales DESC;
-- 4. Monthly freight trend across the 23-month series.
SELECT
EXTRACT(YEAR FROM o.order_date) AS year,
EXTRACT(MONTH FROM o.order_date) AS month,
COUNT(*) AS order_count,
ROUND(SUM(CAST(o.freight AS NUMERIC)), 2) AS total_freight
FROM demo.csv_demos.nw_orders o
GROUP BY year, month
ORDER BY year, month;
-- 5. Products below reorder level (still active).
SELECT
p.product_name,
cat.category_name,
s.company_name AS supplier,
p.units_in_stock,
p.reorder_level,
p.units_on_order
FROM demo.csv_demos.nw_products p
JOIN demo.csv_demos.nw_categories cat ON p.category_id = cat.category_id
JOIN demo.csv_demos.nw_suppliers s ON p.supplier_id = s.supplier_id
WHERE CAST(p.units_in_stock AS INT) < CAST(p.reorder_level AS INT)
AND CAST(p.discontinued AS INT) = 0
ORDER BY (CAST(p.reorder_level AS INT) - CAST(p.units_in_stock AS INT)) DESC;
-- 6. Shipping analysis by carrier.
SELECT
sh.company_name AS shipper,
COUNT(DISTINCT o.order_id) AS shipments,
ROUND(AVG(CAST(o.freight AS NUMERIC)), 2) AS avg_freight,
ROUND(SUM(CAST(od.unit_price AS NUMERIC) * CAST(od.quantity AS INT) * (1 - CAST(od.discount AS NUMERIC))), 2) AS total_order_value
FROM demo.csv_demos.nw_orders o
JOIN demo.csv_demos.nw_shippers sh ON o.ship_via = sh.shipper_id
JOIN demo.csv_demos.nw_order_details od ON o.order_id = od.order_id
GROUP BY sh.company_name
ORDER BY shipments DESC;
-- 7. Employee territory coverage (4-way JOIN).
SELECT
e.first_name || ' ' || e.last_name AS employee_name,
r.region_description AS region,
COUNT(t.territory_id) AS territory_count
FROM demo.csv_demos.nw_employees e
JOIN demo.csv_demos.nw_employee_territories et ON e.employee_id = et.employee_id
JOIN demo.csv_demos.nw_territories t ON et.territory_id = t.territory_id
JOIN demo.csv_demos.nw_regions r ON t.region_id = r.region_id
GROUP BY e.first_name, e.last_name, r.region_description
ORDER BY employee_name, region;
-- 8. Late shipments -- shipped after required date.
SELECT
o.order_id,
c.company_name,
o.order_date,
o.required_date,
o.shipped_date
FROM demo.csv_demos.nw_orders o
JOIN demo.csv_demos.nw_customers c ON o.customer_id = c.customer_id
WHERE o.shipped_date > o.required_date
ORDER BY o.shipped_date DESC;
-- VERIFY: grand totals across the whole dataset.
SELECT
COUNT(DISTINCT o.order_id) AS total_orders,
COUNT(DISTINCT c.customer_id) AS total_customers,
ROUND(SUM(CAST(od.unit_price AS NUMERIC) * CAST(od.quantity AS INT) * (1 - CAST(od.discount AS NUMERIC))), 2) AS grand_total_revenue
FROM demo.csv_demos.nw_order_details od
JOIN demo.csv_demos.nw_orders o ON od.order_id = o.order_id
JOIN demo.csv_demos.nw_customers c ON o.customer_id = c.customer_id;
-- ========================================================================
-- CLEANUP -- drop children before parents
-- ========================================================================
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_employee_territories WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_territories WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_regions WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_shippers WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_suppliers WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_categories WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_products WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_order_details WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_orders WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_employees WITH FILES;
DROP EXTERNAL TABLE IF EXISTS demo.csv_demos.nw_customers WITH FILES;
DROP SCHEMA IF EXISTS demo.csv_demos;
DROP ZONE IF EXISTS demo;
## When to Use Use this demo when you want a realistic, normalised, multi-table CSV dataset to practise joins, aggregations, and business analytics. Northwind is the canonical specialty-food trading company fixture used across SQL tutorials for 25+ years, shipped here as eleven CSV files that DeltaForge loads as plain external tables. Every query is a cross-table JOIN over `customers`, `orders`, `order_details`, `products`, `categories`, `suppliers`, `shippers`, `employees`, `employee_territories`, `territories`, and `regions`. The files are semicolon-delimited (not comma) -- a common real-world variation in European-export CSV -- so this demo also doubles as a practical example of `OPTIONS (delimiter = ';')`. ## What You Will Learn 1. How to register many CSV files as external tables in one script using the same zone/schema pattern. 2. How to read semicolon-delimited CSVs with `OPTIONS (header = 'true', delimiter = ';')`. 3. How to write three- and four-way JOINs across CSV-backed tables (customers-orders-order_details, employees-employee_territories-territories-regions). 4. How to aggregate revenue using `SUM(unit_price * quantity * (1 - discount))` with `CAST`s (CSV columns are strings). 5. How to compute revenue-by-category, revenue-by-supplier, employee sales performance, monthly freight trends, and late-shipment reports. 6. How an `ASSERT ... WHERE` clause pins specific expected values to make the script a regression test rather than a demo. ## Prerequisites Requires the eleven CSV files in `demos/csv/northwind-database/data/` (`customers.csv`, `orders.csv`, `order_details.csv`, `products.csv`, `categories.csv`, `suppliers.csv`, `shippers.csv`, `employees.csv`, `employee_territories.csv`, `territories.csv`, `regions.csv`). All files use `;` as the delimiter and UTF-8 encoding. Point `data_path` at the directory that contains them.
| Name | Type | Description |
|---|---|---|
data_path | Directory containing the eleven semicolon-delimited Northwind CSV files. | |
zone_name | Zone where the csv_demos schema is created. |