From c5bbeaa63ee460215c77342d89ad22c595ef0c91 Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@gmail.com>
Date: Fri, 8 May 2026 12:03:20 +0200
Subject: [PATCH 1/6] feat: introduce format.zig with InputFormat,
 OutputFormat, and OutputWriter

Extracts InputFormat and OutputFormat enums (with parse() methods) and the
writeField CSV helper from main.zig into a new src/format.zig module.
Introduces OutputWriter, a stateful struct that dispatches output formatting
across all five formats (csv, tsv, json, ndjson, xml), eliminating the
format-switch inside execQuery. Closes no issue yet; part of #145.
---
 src/format.zig | 276 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/main.zig   | 213 +++++---------------------------------
 2 files changed, 303 insertions(+), 186 deletions(-)
 create mode 100644 src/format.zig

diff --git a/src/format.zig b/src/format.zig
new file mode 100644
index 0000000..fdbed0a
--- /dev/null
+++ b/src/format.zig
@@ -0,0 +1,276 @@
+//! Format abstraction — input/output format types and the OutputWriter.
+//!
+//! This module owns:
+//!   InputFormat   — supported input formats, with parse()
+//!   OutputFormat  — supported output formats, with parse()
+//!   LoadOpts      — common options forwarded to input-format loaders
+//!   WriteOpts     — options forwarded to OutputWriter
+//!   OutputWriter  — stateful writer that dispatches on OutputFormat
+//!   writeField    — RFC 4180 CSV field writer (used by OutputWriter and --sample mode)
+
+const std = @import("std");
+const c = @import("c");
+const json_mod = @import("json.zig");
+const xml_mod = @import("xml.zig");
+
+// ─── Input format ──────────────────────────────────────
+
+/// Supported input formats.
+pub const InputFormat = enum {
+    csv,
+    tsv,
+    json,
+    ndjson,
+    xml,
+
+    /// Parse a format name string.
+    /// Returns error.InvalidInputFormat when the value is unrecognised.
+    pub fn parse(s: []const u8) error{InvalidInputFormat}!InputFormat {
+        if (std.mem.eql(u8, s, "csv")) return .csv;
+        if (std.mem.eql(u8, s, "tsv")) return .tsv;
+        if (std.mem.eql(u8, s, "json")) return .json;
+        if (std.mem.eql(u8, s, "ndjson")) return .ndjson;
+        if (std.mem.eql(u8, s, "xml")) return .xml;
+        return error.InvalidInputFormat;
+    }
+};
+
+// ─── Output format ─────────────────────────────────────
+
+/// Supported output formats.
+pub const OutputFormat = enum {
+    csv,
+    tsv,
+    json,
+    ndjson,
+    xml,
+
+    /// Parse a format name string.
+    /// Returns error.InvalidOutputFormat when the value is unrecognised.
+    pub fn parse(s: []const u8) error{InvalidOutputFormat}!OutputFormat {
+        if (std.mem.eql(u8, s, "csv")) return .csv;
+        if (std.mem.eql(u8, s, "tsv")) return .tsv;
+        if (std.mem.eql(u8, s, "json")) return .json;
+        if (std.mem.eql(u8, s, "ndjson")) return .ndjson;
+        if (std.mem.eql(u8, s, "xml")) return .xml;
+        return error.InvalidOutputFormat;
+    }
+};
+
+// ─── Load options ───────────────────────────────────────
+
+/// Options forwarded to input-format loaders.
+pub const LoadOpts = struct {
+    /// Abort if more than this many data rows are read; null = unlimited.
+    max_rows: ?usize = null,
+    /// CSV/TSV field delimiter (1–8 bytes).
+    delimiter: []const u8 = ",",
+    /// Infer INTEGER/REAL column types from the first 100 rows (CSV/TSV only).
+    type_inference: bool = true,
+    /// Root element to navigate to for XML input; null = actual document root.
+    xml_root: ?[]const u8 = null,
+    /// Row tag filter for XML input; null = any direct child element.
+    xml_row: ?[]const u8 = null,
+};
+
+// ─── Write options ──────────────────────────────────────
+
+/// Options forwarded to OutputWriter.
+pub const WriteOpts = struct {
+    /// Emit column names as the first row (CSV/TSV output only).
+    header: bool = false,
+    /// Root element name for XML output.
+    xml_root: []const u8 = "results",
+    /// Row element name for XML output.
+    xml_row: []const u8 = "row",
+};
+
+// ─── Output writer ──────────────────────────────────────
+
+/// Stateful writer that formats SQLite result rows in any supported output format.
+///
+/// Usage:
+///   var w = OutputWriter.init(format, opts);
+///   defer w.deinit(allocator);
+///   try w.begin(allocator, stmt, col_count, writer);
+///   while (sqlite3_step(stmt) == SQLITE_ROW) try w.writeRow(stmt, writer);
+///   try w.end(writer);
+pub const OutputWriter = struct {
+    format: OutputFormat,
+    opts: WriteOpts,
+    /// Set to false after the first writeRow call; controls JSON comma placement.
+    first_row: bool,
+    /// Slice of column-name pointers borrowed from SQLite (valid until stmt is finalized).
+    /// Allocated in begin(); freed in deinit().
+    col_names: []const [*:0]const u8,
+    col_count: c_int,
+
+    /// Create a new OutputWriter. Call begin() before the first writeRow().
+    pub fn init(format: OutputFormat, opts: WriteOpts) OutputWriter {
+        return .{
+            .format = format,
+            .opts = opts,
+            .first_row = true,
+            .col_names = &.{},
+            .col_count = 0,
+        };
+    }
+
+    /// Release any memory allocated during begin().
+    /// Safe to call even when begin() was never called.
+    pub fn deinit(self: *OutputWriter, allocator: std.mem.Allocator) void {
+        if (self.col_names.len > 0) {
+            allocator.free(self.col_names);
+        }
+        self.* = undefined;
+    }
+
+    /// Write any format preamble and collect column metadata.
+    ///
+    /// JSON:    writes '['
+    /// XML:     writes the XML declaration and opening root element
+    /// CSV/TSV: writes an optional header row (when opts.header = true)
+    ///
+    /// Pre:  stmt is a valid prepared statement; col_count = sqlite3_column_count(stmt)
+    pub fn begin(
+        self: *OutputWriter,
+        allocator: std.mem.Allocator,
+        stmt: *c.sqlite3_stmt,
+        col_count: c_int,
+        writer: *std.Io.Writer,
+    ) !void {
+        self.col_count = col_count;
+
+        // Collect column-name pointers for formats that need them per row.
+        switch (self.format) {
+            .json, .ndjson, .xml => {
+                const names = try allocator.alloc([*:0]const u8, @intCast(col_count));
+                var i: c_int = 0;
+                while (i < col_count) : (i += 1) {
+                    names[@intCast(i)] = c.sqlite3_column_name(stmt, i);
+                }
+                self.col_names = names;
+            },
+            .csv, .tsv => {
+                if (self.opts.header and col_count > 0)
+                    try csvPrintHeaderRow(stmt, col_count, writer, self.csvDelimiter());
+            },
+        }
+
+        // Write format-specific preamble.
+        switch (self.format) {
+            .json => try writer.writeByte('['),
+            .xml => try xml_mod.writeXmlHeader(writer, self.opts.xml_root),
+            else => {},
+        }
+    }
+
+    /// Write the current SQLITE_ROW to writer.
+    ///
+    /// Pre: sqlite3_step(stmt) just returned SQLITE_ROW; begin() has been called
+    pub fn writeRow(
+        self: *OutputWriter,
+        stmt: *c.sqlite3_stmt,
+        writer: *std.Io.Writer,
+    ) !void {
+        switch (self.format) {
+            .json => {
+                try json_mod.printJsonRow(stmt, self.col_count, self.col_names, writer, self.first_row);
+                self.first_row = false;
+            },
+            .ndjson => try json_mod.printNdjsonRow(stmt, self.col_count, self.col_names, writer),
+            .csv, .tsv => try csvPrintRow(stmt, self.col_count, writer, self.csvDelimiter()),
+            .xml => try xml_mod.writeXmlRow(
+                stmt,
+                self.col_count,
+                self.col_names,
+                writer,
+                self.opts.xml_row,
+            ),
+        }
+    }
+
+    /// Write any format epilogue.
+    ///
+    /// JSON: writes ']\n'
+    /// XML:  writes the closing root element
+    pub fn end(self: *OutputWriter, writer: *std.Io.Writer) !void {
+        switch (self.format) {
+            .json => try writer.writeAll("]\n"),
+            .xml => try xml_mod.writeXmlFooter(writer, self.opts.xml_root),
+            else => {},
+        }
+    }
+
+    fn csvDelimiter(self: OutputWriter) []const u8 {
+        return if (self.format == .tsv) "\t" else ",";
+    }
+};
+
+// ── CSV output helpers ─────────────────────────────────────────────────────────
+
+/// Write a single CSV/TSV field with RFC 4180 quoting when necessary.
+///
+/// Pre:  value is a valid UTF-8 slice; delimiter is the field separator string
+/// Post: if value contains delimiter, '"', '\n', or '\r', it is enclosed in
+///       double-quotes with internal double-quotes doubled; otherwise written verbatim
+pub fn writeField(writer: *std.Io.Writer, value: []const u8, delimiter: []const u8) !void {
+    const needs_quoting = std.mem.indexOf(u8, value, delimiter) != null or
+        std.mem.indexOfAny(u8, value, "\"\n\r") != null;
+    if (needs_quoting) {
+        try writer.writeByte('"');
+        for (value) |ch| {
+            if (ch == '"') try writer.writeByte('"');
+            try writer.writeByte(ch);
+        }
+        try writer.writeByte('"');
+    } else {
+        try writer.writeAll(value);
+    }
+}
+
+/// Write one delimited output row from the current SQLITE_ROW.
+fn csvPrintRow(
+    stmt: *c.sqlite3_stmt,
+    col_count: c_int,
+    writer: *std.Io.Writer,
+    delimiter: []const u8,
+) !void {
+    // Loop invariant I: columns 0..i-1 have been written, separated by delimiter
+    // Bounding function: col_count - i
+    var i: c_int = 0;
+    while (i < col_count) : (i += 1) {
+        if (i > 0) try writer.writeAll(delimiter);
+        if (c.sqlite3_column_type(stmt, i) == c.SQLITE_NULL) {
+            try writer.writeAll("NULL");
+        } else {
+            const ptr = c.sqlite3_column_text(stmt, i);
+            if (ptr != null) {
+                try writeField(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr))), delimiter);
+            } else {
+                try writer.writeAll("NULL");
+            }
+        }
+    }
+    try writer.writeByte('\n');
+}
+
+/// Write a header row with column names from the prepared statement.
+fn csvPrintHeaderRow(
+    stmt: *c.sqlite3_stmt,
+    col_count: c_int,
+    writer: *std.Io.Writer,
+    delimiter: []const u8,
+) !void {
+    // Loop invariant I: columns 0..i-1 names have been written, separated by delimiter
+    // Bounding function: col_count - i
+    var i: c_int = 0;
+    while (i < col_count) : (i += 1) {
+        if (i > 0) try writer.writeAll(delimiter);
+        const name_ptr = c.sqlite3_column_name(stmt, i);
+        if (name_ptr != null) {
+            try writeField(writer, std.mem.span(@as([*:0]const u8, @ptrCast(name_ptr))), delimiter);
+        }
+    }
+    try writer.writeByte('\n');
+}
diff --git a/src/main.zig b/src/main.zig
index 086d36e..cf6aba2 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -3,6 +3,7 @@ const c = @import("c");
 const csv = @import("csv.zig");
 const json = @import("json.zig");
 const xml = @import("xml.zig");
+const format = @import("format.zig");
 const build_options = @import("build_options");
 
 const VERSION: []const u8 = build_options.version;
@@ -72,11 +73,11 @@ const ExitCode = enum(u8) {
     sql_error = 3,
 };
 
-/// Supported input formats.
-const InputFormat = enum { csv, tsv, json, ndjson, xml };
+/// Supported input formats (canonical definition lives in format.zig).
+const InputFormat = format.InputFormat;
 
-/// Supported output formats.
-const OutputFormat = enum { csv, tsv, json, ndjson, xml };
+/// Supported output formats (canonical definition lives in format.zig).
+const OutputFormat = format.OutputFormat;
 
 /// Parsed command-line arguments.
 const ParsedArgs = struct {
@@ -239,32 +240,6 @@ fn parseDelimiter(value: []const u8) SqlPipeError![]const u8 {
     return value;
 }
 
-/// parseInputFormat(s) → InputFormat
-/// Pre:  s is the format string provided by the user
-/// Post: result is the matching InputFormat
-///       error.InvalidInputFormat when s is not "csv", "tsv", "json", or "ndjson"
-fn parseInputFormat(s: []const u8) SqlPipeError!InputFormat {
-    if (std.mem.eql(u8, s, "csv")) return .csv;
-    if (std.mem.eql(u8, s, "tsv")) return .tsv;
-    if (std.mem.eql(u8, s, "json")) return .json;
-    if (std.mem.eql(u8, s, "ndjson")) return .ndjson;
-    if (std.mem.eql(u8, s, "xml")) return .xml;
-    return error.InvalidInputFormat;
-}
-
-/// parseOutputFormat(s) → OutputFormat
-/// Pre:  s is the format string provided by the user
-/// Post: result is the matching OutputFormat
-///       error.InvalidOutputFormat when s is not "csv", "tsv", "json", or "ndjson"
-fn parseOutputFormat(s: []const u8) SqlPipeError!OutputFormat {
-    if (std.mem.eql(u8, s, "csv")) return .csv;
-    if (std.mem.eql(u8, s, "tsv")) return .tsv;
-    if (std.mem.eql(u8, s, "json")) return .json;
-    if (std.mem.eql(u8, s, "ndjson")) return .ndjson;
-    if (std.mem.eql(u8, s, "xml")) return .xml;
-    return error.InvalidOutputFormat;
-}
-
 /// isValidXmlName(s) → bool
 ///
 /// Returns true iff s is a valid XML Name:
@@ -350,19 +325,19 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
         } else if (std.mem.eql(u8, arg, "-I") or std.mem.eql(u8, arg, "--input-format")) {
             i += 1;
             if (i >= args.len) return error.InvalidInputFormat;
-            input_format = try parseInputFormat(args[i]);
+            input_format = InputFormat.parse(args[i]) catch return error.InvalidInputFormat;
         } else if (std.mem.startsWith(u8, arg, "--input-format=")) {
-            input_format = try parseInputFormat(arg["--input-format=".len..]);
+            input_format = InputFormat.parse(arg["--input-format=".len..]) catch return error.InvalidInputFormat;
         } else if (std.mem.startsWith(u8, arg, "-I=")) {
-            input_format = try parseInputFormat(arg["-I=".len..]);
+            input_format = InputFormat.parse(arg["-I=".len..]) catch return error.InvalidInputFormat;
         } else if (std.mem.eql(u8, arg, "-O") or std.mem.eql(u8, arg, "--output-format")) {
             i += 1;
             if (i >= args.len) return error.InvalidOutputFormat;
-            output_format = try parseOutputFormat(args[i]);
+            output_format = OutputFormat.parse(args[i]) catch return error.InvalidOutputFormat;
         } else if (std.mem.startsWith(u8, arg, "--output-format=")) {
-            output_format = try parseOutputFormat(arg["--output-format=".len..]);
+            output_format = OutputFormat.parse(arg["--output-format=".len..]) catch return error.InvalidOutputFormat;
         } else if (std.mem.startsWith(u8, arg, "-O=")) {
-            output_format = try parseOutputFormat(arg["-O=".len..]);
+            output_format = OutputFormat.parse(arg["-O=".len..]) catch return error.InvalidOutputFormat;
         } else if (std.mem.eql(u8, arg, "--max-rows")) {
             i += 1;
             if (i >= args.len) return error.InvalidMaxRows;
@@ -862,94 +837,12 @@ fn insertRowTyped(
     if (c.sqlite3_step(stmt) != c.SQLITE_DONE) return error.StepFailed;
 }
 
-/// printRow(stmt, col_count, writer, delimiter) → !void
-/// Pre:  sqlite3_step returned SQLITE_ROW for stmt
-///       col_count = sqlite3_column_count(stmt) > 0
-///       delimiter is the field separator string (e.g. "," or "\t")
-/// Post: one delimited line written to writer with col_count values;
-///       NULL cells rendered as the literal string "NULL"
-fn printRow(
-    stmt: *c.sqlite3_stmt,
-    col_count: c_int,
-    writer: *std.Io.Writer,
-    delimiter: []const u8,
-) !void {
-    // Loop invariant I: columns 0..i-1 have been written, separated by delimiter
-    // Bounding function: col_count - i
-    var i: c_int = 0;
-    while (i < col_count) : (i += 1) {
-        if (i > 0) try writer.writeAll(delimiter);
-        if (c.sqlite3_column_type(stmt, i) == c.SQLITE_NULL) {
-            try writer.writeAll("NULL");
-        } else {
-            const ptr = c.sqlite3_column_text(stmt, i);
-            if (ptr != null) {
-                try writeField(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr))), delimiter);
-            } else {
-                try writer.writeAll("NULL");
-            }
-        }
-    }
-    try writer.writeByte('\n');
-}
-
-/// writeField(writer, value, delimiter) → !void
-/// Pre:  writer is a valid writer, value is a valid UTF-8 slice
-///       delimiter is the field separator string (e.g. "," or "\t" or "||")
-/// Post: value is written to writer as a single delimited field:
-///       if value contains the delimiter string, double-quote, or newline, it is
-///       enclosed in double-quotes with internal quotes escaped as "" (RFC 4180);
-///       otherwise it is written verbatim
-fn writeField(writer: *std.Io.Writer, value: []const u8, delimiter: []const u8) !void {
-    const needs_quoting = std.mem.indexOf(u8, value, delimiter) != null or
-        std.mem.indexOfAny(u8, value, "\"\n\r") != null;
-    if (needs_quoting) {
-        try writer.writeByte('"');
-        for (value) |ch| {
-            if (ch == '"') try writer.writeByte('"');
-            try writer.writeByte(ch);
-        }
-        try writer.writeByte('"');
-    } else {
-        try writer.writeAll(value);
-    }
-}
-
-/// printHeaderRow(stmt, col_count, writer, delimiter) → !void
-/// Pre:  stmt is a prepared statement, col_count > 0
-///       delimiter is the field separator string (e.g. "," or "\t")
-/// Post: one delimited line with col_count column names written to writer;
-///       names are obtained from sqlite3_column_name (alias or original);
-///       fields are RFC 4180 quoted when they contain special characters
-fn printHeaderRow(
-    stmt: *c.sqlite3_stmt,
-    col_count: c_int,
-    writer: *std.Io.Writer,
-    delimiter: []const u8,
-) !void {
-    // Loop invariant I: columns 0..i-1 names have been written, separated by delimiter
-    // Bounding function: col_count - i
-    var i: c_int = 0;
-    while (i < col_count) : (i += 1) {
-        if (i > 0) try writer.writeAll(delimiter);
-        const name_ptr = c.sqlite3_column_name(stmt, i);
-        if (name_ptr != null) {
-            const name = std.mem.span(@as([*:0]const u8, @ptrCast(name_ptr)));
-            try writeField(writer, name, delimiter);
-        }
-    }
-    try writer.writeByte('\n');
-}
-
 /// execQuery(db, query, allocator, writer, header, output_format) → !void
 /// Pre:  db is open with table `t` populated
 ///       query is a valid SQL string (not null-terminated)
 ///       allocator is valid
 ///       when output_format = .json or .ndjson, header must not be set (caller's responsibility)
-/// Post: if output_format = .json,        results are written as a JSON array of objects
-///       if output_format = .ndjson,       results are written as one JSON object per line
-///       if output_format = .csv or .tsv,  results are written as delimited text;
-///         when header = true, column names are written as the first row
+/// Post: results are written to writer in the requested output format
 ///       error.PrepareQueryFailed when sqlite3_prepare_v2 returns non-SQLITE_OK
 ///       propagates any writer I/O error
 fn execQuery(
@@ -961,7 +854,7 @@ fn execQuery(
     output_format: OutputFormat,
     xml_root: []const u8,
     xml_row: []const u8,
-) (SqlPipeError || std.mem.Allocator.Error || std.Io.Writer.Error)!void {
+) (SqlPipeError || std.mem.Allocator.Error || error{WriteFailed})!void {
     const query_z = try allocator.dupeZ(u8, query);
     defer allocator.free(query_z);
 
@@ -972,72 +865,20 @@ fn execQuery(
 
     const col_count = c.sqlite3_column_count(stmt);
 
-    switch (output_format) {
-        .json => {
-            // Collect column names before stepping (sqlite3_column_name is valid before step)
-            var col_names = try allocator.alloc([*:0]const u8, @intCast(col_count));
-            defer allocator.free(col_names);
-            var ci: c_int = 0;
-            while (ci < col_count) : (ci += 1) {
-                col_names[@intCast(ci)] = c.sqlite3_column_name(stmt, ci);
-            }
-
-            try writer.writeByte('[');
-            var first = true;
-            // Loop invariant I: all SQLITE_ROW results returned so far have been printed as JSON objects
-            // Bounding function: number of remaining rows in the result set (finite)
-            while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
-                try json.printJsonRow(stmt.?, col_count, col_names, writer, first);
-                first = false;
-            }
-            try writer.writeAll("]\n");
-        },
-        .ndjson => {
-            // Collect column names before stepping
-            var col_names = try allocator.alloc([*:0]const u8, @intCast(col_count));
-            defer allocator.free(col_names);
-            var ci: c_int = 0;
-            while (ci < col_count) : (ci += 1) {
-                col_names[@intCast(ci)] = c.sqlite3_column_name(stmt, ci);
-            }
-            // Loop invariant I: all SQLITE_ROW results returned so far have been printed as NDJSON lines
-            // Bounding function: number of remaining rows in the result set (finite)
-            while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
-                try json.printNdjsonRow(stmt.?, col_count, col_names, writer);
-            }
-        },
-        .csv, .tsv => {
-            const out_delim: []const u8 = if (output_format == .tsv) "\t" else ",";
-
-            // When header is requested, print column names before data rows
-            if (header and col_count > 0) {
-                try printHeaderRow(stmt.?, col_count, writer, out_delim);
-            }
-
-            // Loop invariant I: all SQLITE_ROW results returned so far have been printed
-            // Bounding function: number of remaining rows in the result set (finite)
-            while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
-                try printRow(stmt.?, col_count, writer, out_delim);
-            }
-        },
-        .xml => {
-            // Collect column names before stepping
-            var col_names = try allocator.alloc([*:0]const u8, @intCast(col_count));
-            defer allocator.free(col_names);
-            var ci: c_int = 0;
-            while (ci < col_count) : (ci += 1) {
-                col_names[@intCast(ci)] = c.sqlite3_column_name(stmt, ci);
-            }
+    var out_writer = format.OutputWriter.init(output_format, .{
+        .header = header,
+        .xml_root = xml_root,
+        .xml_row = xml_row,
+    });
+    defer out_writer.deinit(allocator);
 
-            try xml.writeXmlHeader(writer, xml_root);
-            // Loop invariant I: all SQLITE_ROW results returned so far have been written as XML rows
-            // Bounding function: number of remaining rows in the result set (finite)
-            while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
-                try xml.writeXmlRow(stmt.?, col_count, col_names, writer, xml_row);
-            }
-            try xml.writeXmlFooter(writer, xml_root);
-        },
+    try out_writer.begin(allocator, stmt.?, col_count, writer);
+    // Loop invariant I: all SQLITE_ROW results returned so far have been written
+    // Bounding function: number of remaining rows in the result set (finite)
+    while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
+        try out_writer.writeRow(stmt.?, writer);
     }
+    try out_writer.end(writer);
 }
 
 // ─── SQL error context helpers ────────────────────────
@@ -1972,7 +1813,7 @@ fn runSample(
             for (cols, 0..) |col, i| {
                 if (i > 0) stdout_writer.writeAll(col_delim) catch
                     fatal("failed to write header", stderr_writer, .csv_error, .{});
-                writeField(stdout_writer, col, col_delim) catch
+                format.writeField(stdout_writer, col, col_delim) catch
                     fatal("failed to write header", stderr_writer, .csv_error, .{});
             }
             stdout_writer.writeByte('\n') catch
@@ -1990,7 +1831,7 @@ fn runSample(
                     if (col_idx > 0) stdout_writer.writeAll(col_delim) catch
                         fatal("failed to write field separator", stderr_writer, .csv_error, .{});
                     const val: []const u8 = if (col_idx < row.len) row[col_idx] else "";
-                    writeField(stdout_writer, val, col_delim) catch
+                    format.writeField(stdout_writer, val, col_delim) catch
                         fatal("failed to write field", stderr_writer, .csv_error, .{});
                 }
                 stdout_writer.writeByte('\n') catch

From ecfd50065e3bfdedb8459186bc1d41b92fcf627f Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@gmail.com>
Date: Fri, 8 May 2026 12:48:05 +0200
Subject: [PATCH 2/6] refactor: extract CLI argument types and parseArgs into
 args.zig

---
 src/args.zig | 439 +++++++++++++++++++++++++++++++++++++++++++++
 src/main.zig | 496 +++------------------------------------------------
 2 files changed, 461 insertions(+), 474 deletions(-)
 create mode 100644 src/args.zig

diff --git a/src/args.zig b/src/args.zig
new file mode 100644
index 0000000..c8e2c47
--- /dev/null
+++ b/src/args.zig
@@ -0,0 +1,439 @@
+//! CLI argument types and parser for sql-pipe.
+
+const std = @import("std");
+const format = @import("format.zig");
+
+const InputFormat = format.InputFormat;
+const OutputFormat = format.OutputFormat;
+
+pub const SqlPipeError = error{
+    MissingQuery,
+    InvalidDelimiter,
+    IncompatibleFlags,
+    SilentVerboseConflict,
+    ColumnsWithQuery,
+    ValidateWithQuery,
+    ValidateWithColumns,
+    OutputWithValidate,
+    InvalidMaxRows,
+    InvalidInputFormat,
+    InvalidOutputFormat,
+    MissingXmlFlagValue,
+    InvalidXmlName,
+    OpenDbFailed,
+    EmptyInput,
+    EmptyColumnName,
+    NoColumns,
+    CreateTableFailed,
+    BeginTransactionFailed,
+    PrepareInsertFailed,
+    BindFailed,
+    StepFailed,
+    PrepareQueryFailed,
+    InvalidOutputPath,
+    OutputWithColumns,
+    SampleWithQuery,
+    SampleWithJson,
+    SampleWithColumns,
+    SampleWithValidate,
+    SampleWithOutput,
+    InvalidSampleCount,
+};
+
+pub const ParsedArgs = struct {
+    /// SQL query to execute against table `t`.
+    query: []const u8,
+    /// Infer column types from the first 100 buffered rows when true.
+    type_inference: bool,
+    /// CSV field delimiter — 1 to 8 bytes (default: ",").
+    delimiter: []const u8,
+    /// Emit column names as first output row when true (CSV output only).
+    header: bool,
+    /// Input format (default: csv).
+    input_format: InputFormat,
+    /// Output format (default: csv).
+    output_format: OutputFormat,
+    /// Abort with exit 1 when more than this many data rows are read; null = unlimited.
+    max_rows: ?usize,
+    /// Print "Loaded <n> rows" to stderr after all rows are inserted when true.
+    /// When false, the message is still shown automatically when stderr is a TTY.
+    verbose: bool,
+    /// Suppress "Loaded <n> rows" unconditionally.
+    silent: bool,
+    /// Write results to this file path instead of stdout; null = write to stdout.
+    output: ?[]const u8,
+    /// Root element name for XML output (default: "results").
+    xml_root: []const u8,
+    /// Row element name for XML output (default: "row").
+    xml_row: []const u8,
+    /// Root element to navigate to for XML input; null = use actual document root.
+    xml_root_input: ?[]const u8,
+    /// Row tag filter for XML input; null = accept any direct child element as a row.
+    xml_row_input: ?[]const u8,
+};
+
+pub const ColumnsArgs = struct {
+    /// CSV field delimiter — 1 to 8 bytes (default: ",").
+    delimiter: []const u8,
+    /// Show inferred type alongside name when true.
+    verbose: bool,
+    /// Input format (default: csv).
+    input_format: InputFormat,
+    /// Root element to navigate to for XML input; null = use actual document root.
+    xml_root_input: ?[]const u8,
+    /// Row tag filter for XML input; null = accept any direct child element as a row.
+    xml_row_input: ?[]const u8,
+};
+
+pub const ValidateArgs = struct {
+    /// CSV field delimiter — 1 to 8 bytes (default: ",").
+    delimiter: []const u8,
+    /// Infer column types from the first 100 buffered rows when true.
+    type_inference: bool,
+    /// Input format (default: csv).
+    input_format: InputFormat,
+    /// Root element to navigate to for XML input; null = use actual document root.
+    xml_root_input: ?[]const u8,
+    /// Row tag filter for XML input; null = accept any direct child element as a row.
+    xml_row_input: ?[]const u8,
+};
+
+pub const SampleArgs = struct {
+    /// CSV field delimiter — 1 to 8 bytes (default: ",").
+    delimiter: []const u8,
+    /// Input format (default: csv).
+    input_format: InputFormat,
+    /// Number of sample rows to print (default: 10).
+    n: usize,
+    /// Infer column types from buffered rows when true; show all TEXT when false.
+    type_inference: bool,
+};
+
+pub const ArgsResult = union(enum) {
+    /// Normal execution: run the query.
+    parsed: ParsedArgs,
+    /// User requested --help / -h.
+    help,
+    /// User requested --version / -V.
+    version,
+    /// User requested --columns: list column names and exit.
+    columns: ColumnsArgs,
+    /// User requested --validate: parse input and print summary.
+    validate: ValidateArgs,
+    /// User requested --sample: print schema + first n rows and exit.
+    sample: SampleArgs,
+};
+
+pub fn printUsage(writer: *std.Io.Writer) !void {
+    try writer.writeAll(
+        \\Usage: sql-pipe [OPTIONS] <query>
+        \\
+        \\Reads input from stdin, loads it into an in-memory SQLite table `t`,
+        \\runs <query>, and prints results to stdout.
+        \\
+        \\Options:
+        \\  -d, --delimiter <string>     Input field delimiter for CSV: 1–8 chars (default: ,)
+        \\  --tsv                        Alias for --delimiter '\t'
+        \\  -I, --input-format <fmt>     Input format: csv (default), tsv, json, ndjson, xml
+        \\  -O, --output-format <fmt>    Output format: csv (default), tsv, json, ndjson, xml
+        \\  --json                       Alias for --output-format json
+        \\  --no-type-inference          Treat all columns as TEXT (CSV input only)
+        \\  -H, --header                 Print column names as the first output row (CSV/TSV output only)
+        \\  --max-rows <n>               Stop if more than <n> data rows are read (exit 1)
+        \\  -v, --verbose                Force row count to stderr (shown automatically on TTY)
+        \\                               With --columns: show inferred type per column
+        \\  -s, --silent                 Suppress row count output unconditionally
+        \\                               Cannot be combined with -v/--verbose
+        \\  --validate                   Parse the entire input and print a summary to stdout
+        \\                               (OK: <n> rows, <m> columns (<col> <TYPE>, ...))
+        \\                               Exit 0 on success, exit 2 on parse error. No query required.
+        \\                               Compatible with --delimiter, --tsv, --no-type-inference, -I.
+        \\  --columns                    List column names from input header (one per line) and exit
+        \\                               Combine with -v/--verbose to include inferred types
+        \\                               Cannot be combined with --output or a query argument
+        \\  --sample [<n>]               Print schema to stderr and first <n> rows to stdout (default: 10)
+        \\                               Schema lists column names and inferred types, prefixed with #
+        \\                               Implies --header. Compatible with --delimiter and --tsv.
+        \\                               Incompatible with --json and with a query argument.
+        \\  --output <file>              Write results to file instead of stdout
+        \\  --xml-root <name>            Root element name for XML I/O (default: results)
+        \\  --xml-row <name>             Row element name for XML I/O (default: row)
+        \\  -h, --help                   Show this help message and exit
+        \\  -V, --version                Show version and exit
+        \\
+        \\Exit codes:
+        \\  0  Success
+        \\  1  Usage error (missing query, bad arguments)
+        \\  2  Input parse error
+        \\  3  SQL error
+        \\
+        \\Examples:
+        \\  echo 'name,age\nAlice,30' | sql-pipe 'SELECT * FROM t'
+        \\  cat data.tsv | sql-pipe --tsv 'SELECT * FROM t'
+        \\  cat data.psv | sql-pipe -d '|' 'SELECT * FROM t'
+        \\  cat data.csv | sql-pipe 'SELECT region, SUM(revenue) FROM t GROUP BY region'
+        \\  cat data.csv | sql-pipe --output-format json 'SELECT * FROM t'
+        \\  cat data.json | sql-pipe --input-format json 'SELECT * FROM t'
+        \\  cat data.ndjson | sql-pipe -I ndjson -O ndjson 'SELECT name FROM t WHERE age > 18'
+        \\  cat data.csv | sql-pipe --sample 5
+        \\
+    );
+}
+
+pub fn parseDelimiter(value: []const u8) SqlPipeError![]const u8 {
+    if (std.mem.eql(u8, value, "\\t")) return "\t";
+    if (value.len == 0) return error.InvalidDelimiter;
+    if (value.len > 8) return error.InvalidDelimiter;
+    return value;
+}
+
+pub fn isValidXmlName(s: []const u8) bool {
+    if (s.len == 0) return false;
+    switch (s[0]) {
+        'a'...'z', 'A'...'Z', '_', ':' => {},
+        else => return false,
+    }
+    for (s[1..]) |ch| {
+        switch (ch) {
+            'a'...'z', 'A'...'Z', '0'...'9', '-', '.', '_', ':' => {},
+            else => return false,
+        }
+    }
+    return true;
+}
+
+pub fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
+    var query: ?[]const u8 = null;
+    var type_inference = true;
+    var delimiter: []const u8 = ",";
+    var header = false;
+    var input_format: InputFormat = .csv;
+    var output_format: OutputFormat = .csv;
+
+    var max_rows: ?usize = null;
+    var verbose = false;
+    var silent = false;
+    var list_columns = false;
+    var validate = false;
+    var output: ?[]const u8 = null;
+    var xml_root: []const u8 = "results";
+    var xml_row: []const u8 = "row";
+    var xml_root_input: ?[]const u8 = null;
+    var xml_row_input: ?[]const u8 = null;
+    var sample_mode = false;
+    var sample_n: usize = 10;
+
+    // Loop invariant I: all args[1..i] have been processed;
+    //   query holds the first non-flag argument seen, or null;
+    //   type_inference reflects the presence of --no-type-inference;
+    //   delimiter reflects -d/--delimiter/--tsv if present;
+    //   header reflects the presence of --header/-H;
+    //   output_format reflects the last --output-format/--json flag seen;
+    //   input_format reflects the last --input-format flag seen;
+    //   max_rows reflects the presence of --max-rows
+    // Bounding function: args.len - i
+    var i: usize = 1;
+    while (i < args.len) : (i += 1) {
+        const arg = args[i];
+        if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) {
+            return .help;
+        } else if (std.mem.eql(u8, arg, "--version") or std.mem.eql(u8, arg, "-V")) {
+            return .version;
+        } else if (std.mem.eql(u8, arg, "--tsv")) {
+            delimiter = "\t";
+        } else if (std.mem.eql(u8, arg, "-d") or std.mem.eql(u8, arg, "--delimiter")) {
+            i += 1;
+            if (i >= args.len) return error.InvalidDelimiter;
+            delimiter = try parseDelimiter(args[i]);
+        } else if (std.mem.startsWith(u8, arg, "--delimiter=")) {
+            delimiter = try parseDelimiter(arg["--delimiter=".len..]);
+        } else if (std.mem.startsWith(u8, arg, "-d=")) {
+            delimiter = try parseDelimiter(arg["-d=".len..]);
+        } else if (std.mem.eql(u8, arg, "--no-type-inference")) {
+            type_inference = false;
+        } else if (std.mem.eql(u8, arg, "--header") or std.mem.eql(u8, arg, "-H")) {
+            header = true;
+        } else if (std.mem.eql(u8, arg, "--json")) {
+            output_format = .json;
+        } else if (std.mem.eql(u8, arg, "-I") or std.mem.eql(u8, arg, "--input-format")) {
+            i += 1;
+            if (i >= args.len) return error.InvalidInputFormat;
+            input_format = InputFormat.parse(args[i]) catch return error.InvalidInputFormat;
+        } else if (std.mem.startsWith(u8, arg, "--input-format=")) {
+            input_format = InputFormat.parse(arg["--input-format=".len..]) catch return error.InvalidInputFormat;
+        } else if (std.mem.startsWith(u8, arg, "-I=")) {
+            input_format = InputFormat.parse(arg["-I=".len..]) catch return error.InvalidInputFormat;
+        } else if (std.mem.eql(u8, arg, "-O") or std.mem.eql(u8, arg, "--output-format")) {
+            i += 1;
+            if (i >= args.len) return error.InvalidOutputFormat;
+            output_format = OutputFormat.parse(args[i]) catch return error.InvalidOutputFormat;
+        } else if (std.mem.startsWith(u8, arg, "--output-format=")) {
+            output_format = OutputFormat.parse(arg["--output-format=".len..]) catch return error.InvalidOutputFormat;
+        } else if (std.mem.startsWith(u8, arg, "-O=")) {
+            output_format = OutputFormat.parse(arg["-O=".len..]) catch return error.InvalidOutputFormat;
+        } else if (std.mem.eql(u8, arg, "--max-rows")) {
+            i += 1;
+            if (i >= args.len) return error.InvalidMaxRows;
+            max_rows = std.fmt.parseUnsigned(usize, args[i], 10) catch return error.InvalidMaxRows;
+            if (max_rows.? == 0) return error.InvalidMaxRows;
+        } else if (std.mem.startsWith(u8, arg, "--max-rows=")) {
+            max_rows = std.fmt.parseUnsigned(usize, arg["--max-rows=".len..], 10) catch return error.InvalidMaxRows;
+            if (max_rows.? == 0) return error.InvalidMaxRows;
+        } else if (std.mem.eql(u8, arg, "--verbose") or std.mem.eql(u8, arg, "-v")) {
+            verbose = true;
+        } else if (std.mem.eql(u8, arg, "--silent") or std.mem.eql(u8, arg, "-s")) {
+            silent = true;
+        } else if (std.mem.eql(u8, arg, "--columns")) {
+            list_columns = true;
+        } else if (std.mem.eql(u8, arg, "--validate")) {
+            validate = true;
+        } else if (std.mem.eql(u8, arg, "--sample")) {
+            sample_mode = true;
+            // Peek at next arg: if it is a positive integer, consume it as the sample count
+            if (i + 1 < args.len) {
+                const next = args[i + 1];
+                if (next.len > 0 and next[0] != '-') {
+                    if (std.fmt.parseUnsigned(usize, next, 10)) |n| {
+                        if (n == 0) return error.InvalidSampleCount;
+                        sample_n = n;
+                        i += 1;
+                    } else |_| {
+                        // Not a number — keep default (10)
+                    }
+                }
+            }
+        } else if (std.mem.startsWith(u8, arg, "--sample=")) {
+            const val = arg["--sample=".len..];
+            const n = std.fmt.parseUnsigned(usize, val, 10) catch return error.InvalidSampleCount;
+            if (n == 0) return error.InvalidSampleCount;
+            sample_n = n;
+            sample_mode = true;
+        } else if (std.mem.eql(u8, arg, "--output")) {
+            i += 1;
+            if (i >= args.len) return error.InvalidOutputPath;
+            const trimmed = std.mem.trim(u8, args[i], " \t");
+            if (trimmed.len == 0) return error.InvalidOutputPath;
+            output = trimmed;
+        } else if (std.mem.startsWith(u8, arg, "--output=")) {
+            const trimmed = std.mem.trim(u8, arg["--output=".len..], " \t");
+            if (trimmed.len == 0) return error.InvalidOutputPath;
+            output = trimmed;
+        } else if (std.mem.eql(u8, arg, "--xml-root")) {
+            i += 1;
+            if (i >= args.len) return error.MissingXmlFlagValue;
+            xml_root = args[i];
+            xml_root_input = args[i];
+        } else if (std.mem.startsWith(u8, arg, "--xml-root=")) {
+            xml_root = arg["--xml-root=".len..];
+            xml_root_input = arg["--xml-root=".len..];
+        } else if (std.mem.eql(u8, arg, "--xml-row")) {
+            i += 1;
+            if (i >= args.len) return error.MissingXmlFlagValue;
+            xml_row = args[i];
+            xml_row_input = args[i];
+        } else if (std.mem.startsWith(u8, arg, "--xml-row=")) {
+            xml_row = arg["--xml-row=".len..];
+            xml_row_input = arg["--xml-row=".len..];
+        } else {
+            if (query == null) query = arg;
+        }
+    }
+
+    // Non-CSV/TSV output format is mutually exclusive with --header
+    if (output_format != .csv and output_format != .tsv and header)
+        return error.IncompatibleFlags;
+
+    // --output is mutually exclusive with --columns (--columns always writes to stdout)
+    if (output != null and list_columns)
+        return error.OutputWithColumns;
+
+    // --output is mutually exclusive with --validate (--validate always writes to stdout)
+    if (output != null and validate)
+        return error.OutputWithValidate;
+
+    // --output is mutually exclusive with --sample (--sample always writes to stdout)
+    if (output != null and sample_mode)
+        return error.SampleWithOutput;
+
+    // --validate is mutually exclusive with --columns
+    if (validate and list_columns)
+        return error.ValidateWithColumns;
+
+    // --columns is mutually exclusive with a query argument
+    if (list_columns and query != null)
+        return error.ColumnsWithQuery;
+
+    // --validate is mutually exclusive with a query argument
+    if (validate and query != null)
+        return error.ValidateWithQuery;
+
+    // --sample is mutually exclusive with a query argument
+    if (sample_mode and query != null)
+        return error.SampleWithQuery;
+
+    // --sample is mutually exclusive with --json / json output format
+    if (sample_mode and (output_format == .json or output_format == .ndjson))
+        return error.SampleWithJson;
+
+    // --sample is mutually exclusive with --columns
+    if (sample_mode and list_columns)
+        return error.SampleWithColumns;
+
+    // --sample is mutually exclusive with --validate
+    if (sample_mode and validate)
+        return error.SampleWithValidate;
+
+    // --silent and --verbose are mutually exclusive
+    if (silent and verbose)
+        return error.SilentVerboseConflict;
+
+    // --xml-root and --xml-row must be valid XML element names
+    if (!isValidXmlName(xml_root) or !isValidXmlName(xml_row))
+        return error.InvalidXmlName;
+
+    // --columns mode: list headers and exit
+    if (list_columns)
+        return .{ .columns = ColumnsArgs{
+            .delimiter = delimiter,
+            .verbose = verbose,
+            .input_format = input_format,
+            .xml_root_input = xml_root_input,
+            .xml_row_input = xml_row_input,
+        } };
+
+    // --validate mode: parse CSV and print summary
+    if (validate)
+        return .{ .validate = ValidateArgs{
+            .delimiter = delimiter,
+            .type_inference = type_inference,
+            .input_format = input_format,
+            .xml_root_input = xml_root_input,
+            .xml_row_input = xml_row_input,
+        } };
+
+    // --sample mode: print schema + first n rows and exit
+    if (sample_mode)
+        return .{ .sample = SampleArgs{
+            .delimiter = delimiter,
+            .input_format = input_format,
+            .n = sample_n,
+            .type_inference = type_inference,
+        } };
+
+    return .{ .parsed = ParsedArgs{
+        .query = query orelse return error.MissingQuery,
+        .type_inference = type_inference,
+        .delimiter = delimiter,
+        .header = header,
+        .input_format = input_format,
+        .output_format = output_format,
+        .max_rows = max_rows,
+        .verbose = verbose,
+        .silent = silent,
+        .output = output,
+        .xml_root = xml_root,
+        .xml_row = xml_row,
+        .xml_root_input = xml_root_input,
+        .xml_row_input = xml_row_input,
+    } };
+}
diff --git a/src/main.zig b/src/main.zig
index cf6aba2..839b32f 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -5,6 +5,7 @@ const json = @import("json.zig");
 const xml = @import("xml.zig");
 const format = @import("format.zig");
 const build_options = @import("build_options");
+const args_mod = @import("args.zig");
 
 const VERSION: []const u8 = build_options.version;
 
@@ -14,41 +15,14 @@ const sqlite_static: c.sqlite3_destructor_type = null;
 
 /// SQLITE_TRANSIENT sentinel: tells sqlite3_bind_text to copy the string
 /// immediately (safe for short-lived source buffers, e.g. JSON arena data).
-// ─── Error types ─────────────────────────────────────
-
-const SqlPipeError = error{
-    MissingQuery,
-    InvalidDelimiter,
-    IncompatibleFlags,
-    SilentVerboseConflict,
-    ColumnsWithQuery,
-    ValidateWithQuery,
-    ValidateWithColumns,
-    OutputWithValidate,
-    InvalidMaxRows,
-    InvalidInputFormat,
-    InvalidOutputFormat,
-    MissingXmlFlagValue,
-    InvalidXmlName,
-    OpenDbFailed,
-    EmptyInput,
-    EmptyColumnName,
-    NoColumns,
-    CreateTableFailed,
-    BeginTransactionFailed,
-    PrepareInsertFailed,
-    BindFailed,
-    StepFailed,
-    PrepareQueryFailed,
-    InvalidOutputPath,
-    OutputWithColumns,
-    SampleWithQuery,
-    SampleWithJson,
-    SampleWithColumns,
-    SampleWithValidate,
-    SampleWithOutput,
-    InvalidSampleCount,
-};
+const SqlPipeError = args_mod.SqlPipeError;
+const ParsedArgs = args_mod.ParsedArgs;
+const ColumnsArgs = args_mod.ColumnsArgs;
+const ValidateArgs = args_mod.ValidateArgs;
+const SampleArgs = args_mod.SampleArgs;
+const ArgsResult = args_mod.ArgsResult;
+const parseArgs = args_mod.parseArgs;
+const printUsage = args_mod.printUsage;
 
 // ─── Column type inference ────────────────────────────
 
@@ -79,432 +53,6 @@ const InputFormat = format.InputFormat;
 /// Supported output formats (canonical definition lives in format.zig).
 const OutputFormat = format.OutputFormat;
 
-/// Parsed command-line arguments.
-const ParsedArgs = struct {
-    /// SQL query to execute against table `t`.
-    query: []const u8,
-    /// Infer column types from the first 100 buffered rows when true.
-    type_inference: bool,
-    /// CSV field delimiter — 1 to 8 bytes (default: ",").
-    delimiter: []const u8,
-    /// Emit column names as first output row when true (CSV output only).
-    header: bool,
-    /// Input format (default: csv).
-    input_format: InputFormat,
-    /// Output format (default: csv).
-    output_format: OutputFormat,
-    /// Abort with exit 1 when more than this many data rows are read; null = unlimited.
-    max_rows: ?usize,
-    /// Print "Loaded <n> rows" to stderr after all rows are inserted when true.
-    /// When false, the message is still shown automatically when stderr is a TTY.
-    verbose: bool,
-    /// Suppress "Loaded <n> rows" unconditionally.
-    silent: bool,
-    /// Write results to this file path instead of stdout; null = write to stdout.
-    output: ?[]const u8,
-    /// Root element name for XML output (default: "results").
-    xml_root: []const u8,
-    /// Row element name for XML output (default: "row").
-    xml_row: []const u8,
-    /// Root element to navigate to for XML input; null = use actual document root.
-    xml_root_input: ?[]const u8,
-    /// Row tag filter for XML input; null = accept any direct child element as a row.
-    xml_row_input: ?[]const u8,
-};
-
-/// Arguments for `--columns` mode.
-const ColumnsArgs = struct {
-    /// CSV field delimiter — 1 to 8 bytes (default: ",").
-    delimiter: []const u8,
-    /// Show inferred type alongside name when true.
-    verbose: bool,
-    /// Input format (default: csv).
-    input_format: InputFormat,
-    /// Root element to navigate to for XML input; null = use actual document root.
-    xml_root_input: ?[]const u8,
-    /// Row tag filter for XML input; null = accept any direct child element as a row.
-    xml_row_input: ?[]const u8,
-};
-
-/// Arguments for `--validate` mode.
-const ValidateArgs = struct {
-    /// CSV field delimiter — 1 to 8 bytes (default: ",").
-    delimiter: []const u8,
-    /// Infer column types from the first 100 buffered rows when true.
-    type_inference: bool,
-    /// Input format (default: csv).
-    input_format: InputFormat,
-    /// Root element to navigate to for XML input; null = use actual document root.
-    xml_root_input: ?[]const u8,
-    /// Row tag filter for XML input; null = accept any direct child element as a row.
-    xml_row_input: ?[]const u8,
-};
-
-/// Arguments for `--sample` mode.
-const SampleArgs = struct {
-    /// CSV field delimiter — 1 to 8 bytes (default: ",").
-    delimiter: []const u8,
-    /// Input format (default: csv).
-    input_format: InputFormat,
-    /// Number of sample rows to print (default: 10).
-    n: usize,
-    /// Infer column types from buffered rows when true; show all TEXT when false.
-    type_inference: bool,
-};
-
-/// Result of argument parsing — either parsed arguments or a special action.
-const ArgsResult = union(enum) {
-    /// Normal execution: run the query.
-    parsed: ParsedArgs,
-    /// User requested --help / -h.
-    help,
-    /// User requested --version / -V.
-    version,
-    /// User requested --columns: list column names and exit.
-    columns: ColumnsArgs,
-    /// User requested --validate: parse input and print summary.
-    validate: ValidateArgs,
-    /// User requested --sample: print schema + first n rows and exit.
-    sample: SampleArgs,
-};
-
-// ─── Extracted functions ──────────────────────────────
-
-/// printUsage(writer) → void
-/// Pre:  writer is a valid stderr writer
-/// Post: usage text has been written to writer
-fn printUsage(writer: *std.Io.Writer) !void {
-    try writer.writeAll(
-        \\Usage: sql-pipe [OPTIONS] <query>
-        \\
-        \\Reads input from stdin, loads it into an in-memory SQLite table `t`,
-        \\runs <query>, and prints results to stdout.
-        \\
-        \\Options:
-        \\  -d, --delimiter <string>     Input field delimiter for CSV: 1–8 chars (default: ,)
-        \\  --tsv                        Alias for --delimiter '\t'
-        \\  -I, --input-format <fmt>     Input format: csv (default), tsv, json, ndjson, xml
-        \\  -O, --output-format <fmt>    Output format: csv (default), tsv, json, ndjson, xml
-        \\  --json                       Alias for --output-format json
-        \\  --no-type-inference          Treat all columns as TEXT (CSV input only)
-        \\  -H, --header                 Print column names as the first output row (CSV/TSV output only)
-        \\  --max-rows <n>               Stop if more than <n> data rows are read (exit 1)
-        \\  -v, --verbose                Force row count to stderr (shown automatically on TTY)
-        \\                               With --columns: show inferred type per column
-        \\  -s, --silent                 Suppress row count output unconditionally
-        \\                               Cannot be combined with -v/--verbose
-        \\  --validate                   Parse the entire input and print a summary to stdout
-        \\                               (OK: <n> rows, <m> columns (<col> <TYPE>, ...))
-        \\                               Exit 0 on success, exit 2 on parse error. No query required.
-        \\                               Compatible with --delimiter, --tsv, --no-type-inference, -I.
-        \\  --columns                    List column names from input header (one per line) and exit
-        \\                               Combine with -v/--verbose to include inferred types
-        \\                               Cannot be combined with --output or a query argument
-        \\  --sample [<n>]               Print schema to stderr and first <n> rows to stdout (default: 10)
-        \\                               Schema lists column names and inferred types, prefixed with #
-        \\                               Implies --header. Compatible with --delimiter and --tsv.
-        \\                               Incompatible with --json and with a query argument.
-        \\  --output <file>              Write results to file instead of stdout
-        \\  --xml-root <name>            Root element name for XML I/O (default: results)
-        \\  --xml-row <name>             Row element name for XML I/O (default: row)
-        \\  -h, --help                   Show this help message and exit
-        \\  -V, --version                Show version and exit
-        \\
-        \\Exit codes:
-        \\  0  Success
-        \\  1  Usage error (missing query, bad arguments)
-        \\  2  Input parse error
-        \\  3  SQL error
-        \\
-        \\Examples:
-        \\  echo 'name,age\nAlice,30' | sql-pipe 'SELECT * FROM t'
-        \\  cat data.tsv | sql-pipe --tsv 'SELECT * FROM t'
-        \\  cat data.psv | sql-pipe -d '|' 'SELECT * FROM t'
-        \\  cat data.csv | sql-pipe 'SELECT region, SUM(revenue) FROM t GROUP BY region'
-        \\  cat data.csv | sql-pipe --output-format json 'SELECT * FROM t'
-        \\  cat data.json | sql-pipe --input-format json 'SELECT * FROM t'
-        \\  cat data.ndjson | sql-pipe -I ndjson -O ndjson 'SELECT name FROM t WHERE age > 18'
-        \\  cat data.csv | sql-pipe --sample 5
-        \\
-    );
-}
-
-/// parseDelimiter(value) → []const u8
-/// Pre:  value is the delimiter token provided by the user
-/// Post: result is a 1–8 byte delimiter string, or "\t" when value = "\\t"
-///       error.InvalidDelimiter when value is empty or longer than 8 bytes
-fn parseDelimiter(value: []const u8) SqlPipeError![]const u8 {
-    if (std.mem.eql(u8, value, "\\t")) return "\t";
-    if (value.len == 0) return error.InvalidDelimiter;
-    if (value.len > 8) return error.InvalidDelimiter;
-    return value;
-}
-
-/// isValidXmlName(s) → bool
-///
-/// Returns true iff s is a valid XML Name:
-///   NameStartChar: letter, '_', ':'
-///   NameChar: NameStartChar | digit | '-' | '.'
-fn isValidXmlName(s: []const u8) bool {
-    if (s.len == 0) return false;
-    switch (s[0]) {
-        'a'...'z', 'A'...'Z', '_', ':' => {},
-        else => return false,
-    }
-    for (s[1..]) |ch| {
-        switch (ch) {
-            'a'...'z', 'A'...'Z', '0'...'9', '-', '.', '_', ':' => {},
-            else => return false,
-        }
-    }
-    return true;
-}
-
-/// parseArgs(args) → ArgsResult
-/// Pre:  args is the full process argument slice; args[0] is the program name
-/// Post: result.parsed.query is the first non-flag argument
-///       result.parsed.type_inference = false when "--no-type-inference" is present
-///       result.parsed.output_format = .json when "--json" or "--output-format json" is present
-///       result = .help when --help or -h is present
-///       result = .version when --version or -V is present
-///       error.MissingQuery when no non-flag argument is found
-///       error.IncompatibleFlags when a non-CSV/TSV output format is combined with --header
-fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
-    var query: ?[]const u8 = null;
-    var type_inference = true;
-    var delimiter: []const u8 = ",";
-    var header = false;
-    var input_format: InputFormat = .csv;
-    var output_format: OutputFormat = .csv;
-
-    var max_rows: ?usize = null;
-    var verbose = false;
-    var silent = false;
-    var list_columns = false;
-    var validate = false;
-    var output: ?[]const u8 = null;
-    var xml_root: []const u8 = "results";
-    var xml_row: []const u8 = "row";
-    var xml_root_input: ?[]const u8 = null;
-    var xml_row_input: ?[]const u8 = null;
-    var sample_mode = false;
-    var sample_n: usize = 10;
-
-    // Loop invariant I: all args[1..i] have been processed;
-    //   query holds the first non-flag argument seen, or null;
-    //   type_inference reflects the presence of --no-type-inference;
-    //   delimiter reflects -d/--delimiter/--tsv if present;
-    //   header reflects the presence of --header/-H;
-    //   output_format reflects the last --output-format/--json flag seen;
-    //   input_format reflects the last --input-format flag seen;
-    //   max_rows reflects the presence of --max-rows
-    // Bounding function: args.len - i
-    var i: usize = 1;
-    while (i < args.len) : (i += 1) {
-        const arg = args[i];
-        if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) {
-            return .help;
-        } else if (std.mem.eql(u8, arg, "--version") or std.mem.eql(u8, arg, "-V")) {
-            return .version;
-        } else if (std.mem.eql(u8, arg, "--tsv")) {
-            delimiter = "\t";
-        } else if (std.mem.eql(u8, arg, "-d") or std.mem.eql(u8, arg, "--delimiter")) {
-            i += 1;
-            if (i >= args.len) return error.InvalidDelimiter;
-            delimiter = try parseDelimiter(args[i]);
-        } else if (std.mem.startsWith(u8, arg, "--delimiter=")) {
-            delimiter = try parseDelimiter(arg["--delimiter=".len..]);
-        } else if (std.mem.startsWith(u8, arg, "-d=")) {
-            delimiter = try parseDelimiter(arg["-d=".len..]);
-        } else if (std.mem.eql(u8, arg, "--no-type-inference")) {
-            type_inference = false;
-        } else if (std.mem.eql(u8, arg, "--header") or std.mem.eql(u8, arg, "-H")) {
-            header = true;
-        } else if (std.mem.eql(u8, arg, "--json")) {
-            output_format = .json;
-        } else if (std.mem.eql(u8, arg, "-I") or std.mem.eql(u8, arg, "--input-format")) {
-            i += 1;
-            if (i >= args.len) return error.InvalidInputFormat;
-            input_format = InputFormat.parse(args[i]) catch return error.InvalidInputFormat;
-        } else if (std.mem.startsWith(u8, arg, "--input-format=")) {
-            input_format = InputFormat.parse(arg["--input-format=".len..]) catch return error.InvalidInputFormat;
-        } else if (std.mem.startsWith(u8, arg, "-I=")) {
-            input_format = InputFormat.parse(arg["-I=".len..]) catch return error.InvalidInputFormat;
-        } else if (std.mem.eql(u8, arg, "-O") or std.mem.eql(u8, arg, "--output-format")) {
-            i += 1;
-            if (i >= args.len) return error.InvalidOutputFormat;
-            output_format = OutputFormat.parse(args[i]) catch return error.InvalidOutputFormat;
-        } else if (std.mem.startsWith(u8, arg, "--output-format=")) {
-            output_format = OutputFormat.parse(arg["--output-format=".len..]) catch return error.InvalidOutputFormat;
-        } else if (std.mem.startsWith(u8, arg, "-O=")) {
-            output_format = OutputFormat.parse(arg["-O=".len..]) catch return error.InvalidOutputFormat;
-        } else if (std.mem.eql(u8, arg, "--max-rows")) {
-            i += 1;
-            if (i >= args.len) return error.InvalidMaxRows;
-            max_rows = std.fmt.parseUnsigned(usize, args[i], 10) catch return error.InvalidMaxRows;
-            if (max_rows.? == 0) return error.InvalidMaxRows;
-        } else if (std.mem.startsWith(u8, arg, "--max-rows=")) {
-            max_rows = std.fmt.parseUnsigned(usize, arg["--max-rows=".len..], 10) catch return error.InvalidMaxRows;
-            if (max_rows.? == 0) return error.InvalidMaxRows;
-        } else if (std.mem.eql(u8, arg, "--verbose") or std.mem.eql(u8, arg, "-v")) {
-            verbose = true;
-        } else if (std.mem.eql(u8, arg, "--silent") or std.mem.eql(u8, arg, "-s")) {
-            silent = true;
-        } else if (std.mem.eql(u8, arg, "--columns")) {
-            list_columns = true;
-        } else if (std.mem.eql(u8, arg, "--validate")) {
-            validate = true;
-        } else if (std.mem.eql(u8, arg, "--sample")) {
-            sample_mode = true;
-            // Peek at next arg: if it is a positive integer, consume it as the sample count
-            if (i + 1 < args.len) {
-                const next = args[i + 1];
-                if (next.len > 0 and next[0] != '-') {
-                    if (std.fmt.parseUnsigned(usize, next, 10)) |n| {
-                        if (n == 0) return error.InvalidSampleCount;
-                        sample_n = n;
-                        i += 1;
-                    } else |_| {
-                        // Not a number — keep default (10)
-                    }
-                }
-            }
-        } else if (std.mem.startsWith(u8, arg, "--sample=")) {
-            const val = arg["--sample=".len..];
-            const n = std.fmt.parseUnsigned(usize, val, 10) catch return error.InvalidSampleCount;
-            if (n == 0) return error.InvalidSampleCount;
-            sample_n = n;
-            sample_mode = true;
-        } else if (std.mem.eql(u8, arg, "--output")) {
-            i += 1;
-            if (i >= args.len) return error.InvalidOutputPath;
-            const trimmed = std.mem.trim(u8, args[i], " \t");
-            if (trimmed.len == 0) return error.InvalidOutputPath;
-            output = trimmed;
-        } else if (std.mem.startsWith(u8, arg, "--output=")) {
-            const trimmed = std.mem.trim(u8, arg["--output=".len..], " \t");
-            if (trimmed.len == 0) return error.InvalidOutputPath;
-            output = trimmed;
-        } else if (std.mem.eql(u8, arg, "--xml-root")) {
-            i += 1;
-            if (i >= args.len) return error.MissingXmlFlagValue;
-            xml_root = args[i];
-            xml_root_input = args[i];
-        } else if (std.mem.startsWith(u8, arg, "--xml-root=")) {
-            xml_root = arg["--xml-root=".len..];
-            xml_root_input = arg["--xml-root=".len..];
-        } else if (std.mem.eql(u8, arg, "--xml-row")) {
-            i += 1;
-            if (i >= args.len) return error.MissingXmlFlagValue;
-            xml_row = args[i];
-            xml_row_input = args[i];
-        } else if (std.mem.startsWith(u8, arg, "--xml-row=")) {
-            xml_row = arg["--xml-row=".len..];
-            xml_row_input = arg["--xml-row=".len..];
-        } else {
-            if (query == null) query = arg;
-        }
-    }
-
-    // Non-CSV/TSV output format is mutually exclusive with --header
-    if (output_format != .csv and output_format != .tsv and header)
-        return error.IncompatibleFlags;
-
-    // --output is mutually exclusive with --columns (--columns always writes to stdout)
-    if (output != null and list_columns)
-        return error.OutputWithColumns;
-
-    // --output is mutually exclusive with --validate (--validate always writes to stdout)
-    if (output != null and validate)
-        return error.OutputWithValidate;
-
-    // --output is mutually exclusive with --sample (--sample always writes to stdout)
-    if (output != null and sample_mode)
-        return error.SampleWithOutput;
-
-    // --validate is mutually exclusive with --columns
-    if (validate and list_columns)
-        return error.ValidateWithColumns;
-
-    // --columns is mutually exclusive with a query argument
-    if (list_columns and query != null)
-        return error.ColumnsWithQuery;
-
-    // --validate is mutually exclusive with a query argument
-    if (validate and query != null)
-        return error.ValidateWithQuery;
-
-    // --sample is mutually exclusive with a query argument
-    if (sample_mode and query != null)
-        return error.SampleWithQuery;
-
-    // --sample is mutually exclusive with --json / json output format
-    if (sample_mode and (output_format == .json or output_format == .ndjson))
-        return error.SampleWithJson;
-
-    // --sample is mutually exclusive with --columns
-    if (sample_mode and list_columns)
-        return error.SampleWithColumns;
-
-    // --sample is mutually exclusive with --validate
-    if (sample_mode and validate)
-        return error.SampleWithValidate;
-
-    // --silent and --verbose are mutually exclusive
-    if (silent and verbose)
-        return error.SilentVerboseConflict;
-
-    // --xml-root and --xml-row must be valid XML element names
-    if (!isValidXmlName(xml_root) or !isValidXmlName(xml_row))
-        return error.InvalidXmlName;
-
-    // --columns mode: list headers and exit
-    if (list_columns)
-        return .{ .columns = ColumnsArgs{
-            .delimiter = delimiter,
-            .verbose = verbose,
-            .input_format = input_format,
-            .xml_root_input = xml_root_input,
-            .xml_row_input = xml_row_input,
-        } };
-
-    // --validate mode: parse CSV and print summary
-    if (validate)
-        return .{ .validate = ValidateArgs{
-            .delimiter = delimiter,
-            .type_inference = type_inference,
-            .input_format = input_format,
-            .xml_root_input = xml_root_input,
-            .xml_row_input = xml_row_input,
-        } };
-
-    // --sample mode: print schema + first n rows and exit
-    if (sample_mode)
-        return .{ .sample = SampleArgs{
-            .delimiter = delimiter,
-            .input_format = input_format,
-            .n = sample_n,
-            .type_inference = type_inference,
-        } };
-
-    return .{ .parsed = ParsedArgs{
-        .query = query orelse return error.MissingQuery,
-        .type_inference = type_inference,
-        .delimiter = delimiter,
-        .header = header,
-        .input_format = input_format,
-        .output_format = output_format,
-        .max_rows = max_rows,
-        .verbose = verbose,
-        .silent = silent,
-        .output = output,
-        .xml_root = xml_root,
-        .xml_row = xml_row,
-        .xml_root_input = xml_root_input,
-        .xml_row_input = xml_row_input,
-    } };
-}
-
 /// openDb() → *sqlite3
 /// Pre:  —
 /// Post: result is an open, empty in-memory SQLite database handle
@@ -1210,15 +758,15 @@ fn loadCsvInput(
     return rows_inserted;
 }
 
-/// runColumns(args, allocator, io, stderr_writer, stdout_writer) → void
+/// runColumns(allocator, io, args, stderr_writer, stdout_writer) → void
 /// Pre:  args is valid; allocator and writers are valid
 /// Post: column names from the input header (CSV/JSON/NDJSON) are written to stdout,
 ///       one per line; when args.verbose is true each line has format "<name> <TYPE>"
 ///       (CSV only — JSON/NDJSON always show TEXT); exits 0 on success, 2 on parse error
 fn runColumns(
-    args: ColumnsArgs,
     allocator: std.mem.Allocator,
     io: std.Io,
+    args: ColumnsArgs,
     stderr_writer: *std.Io.Writer,
     stdout_writer: *std.Io.Writer,
 ) void {
@@ -1394,15 +942,15 @@ fn runColumns(
     }
 }
 
-/// runValidate(args, allocator, io, stderr_writer, stdout_writer) → void
+/// runValidate(allocator, io, args, stderr_writer, stdout_writer) → void
 /// Pre:  args is valid; allocator and writers are valid
 /// Post: the entire input has been parsed (CSV, TSV, JSON, or NDJSON);
 ///       on success prints "OK: <n> rows, <m> columns (<col> <TYPE>, ...)" to stdout.
 ///       On parse error, prints the error message to stderr and exits 2.
 fn runValidate(
-    args: ValidateArgs,
     allocator: std.mem.Allocator,
     io: std.Io,
+    args: ValidateArgs,
     stderr_writer: *std.Io.Writer,
     stdout_writer: *std.Io.Writer,
 ) void {
@@ -1694,16 +1242,16 @@ fn runValidate(
     }
 }
 
-/// runSample(args, allocator, io, stderr_writer, stdout_writer) → void
+/// runSample(allocator, io, args, stderr_writer, stdout_writer) → void
 /// Pre:  args is valid; allocator and writers are valid; input_format is csv or tsv
 /// Post: a schema comment block is written to stderr (column names + inferred types,
 ///       or all TEXT if args.type_inference is false, each line prefixed with "#") and
 ///       a header row + first args.n data rows are written to stdout as delimited text.
 ///       Exits 2 on parse error, 1 on stdout write error. No query required.
 fn runSample(
-    args: SampleArgs,
     allocator: std.mem.Allocator,
     io: std.Io,
+    args: SampleArgs,
     stderr_writer: *std.Io.Writer,
     stdout_writer: *std.Io.Writer,
 ) void {
@@ -1841,16 +1389,16 @@ fn runSample(
     }
 }
 
-/// run(parsed, allocator, io, stderr_writer, stdout_writer) → void
+/// run(allocator, io, parsed, stderr_writer, stdout_writer) → void
 /// Pre:  parsed contains a valid query; allocator and writers are valid
 /// Post: input from stdin has been loaded (dispatched on parsed.input_format),
 ///       query executed, results written to stdout in parsed.output_format
 ///       On error, an "error: ..." message is written to stderr and process
 ///       exits with the appropriate ExitCode (1, 2, or 3)
 fn run(
-    parsed: ParsedArgs,
     allocator: std.mem.Allocator,
     io: std.Io,
+    parsed: ParsedArgs,
     stderr_writer: *std.Io.Writer,
     stdout_writer: *std.Io.Writer,
 ) void {
@@ -2093,7 +1641,7 @@ pub fn main(init: std.process.Init.Minimal) void {
             std.process.exit(@intFromEnum(ExitCode.success));
         },
         .columns => |col_args| {
-            runColumns(col_args, allocator, io.io(), stderr_writer, stdout_writer);
+            runColumns(allocator, io.io(), col_args, stderr_writer, stdout_writer);
             stdout_file_writer.flush() catch |err| {
                 std.log.err("failed to flush stdout: {}", .{err});
             };
@@ -2102,7 +1650,7 @@ pub fn main(init: std.process.Init.Minimal) void {
             };
         },
         .validate => |val_args| {
-            runValidate(val_args, allocator, io.io(), stderr_writer, stdout_writer);
+            runValidate(allocator, io.io(), val_args, stderr_writer, stdout_writer);
             stdout_file_writer.flush() catch |err| {
                 std.log.err("failed to flush stdout: {}", .{err});
             };
@@ -2111,7 +1659,7 @@ pub fn main(init: std.process.Init.Minimal) void {
             };
         },
         .sample => |sample_args| {
-            runSample(sample_args, allocator, io.io(), stderr_writer, stdout_writer);
+            runSample(allocator, io.io(), sample_args, stderr_writer, stdout_writer);
             stdout_file_writer.flush() catch |err| {
                 std.log.err("failed to flush stdout: {}", .{err});
             };
@@ -2131,12 +1679,12 @@ pub fn main(init: std.process.Init.Minimal) void {
                 defer std.Io.File.close(output_file, io.io());
                 var output_buf: [4096]u8 = undefined;
                 var output_file_writer = std.Io.File.writer(output_file, io.io(), &output_buf);
-                run(parsed, allocator, io.io(), stderr_writer, &output_file_writer.interface);
+                run(allocator, io.io(), parsed, stderr_writer, &output_file_writer.interface);
                 output_file_writer.flush() catch |err| {
                     std.log.err("failed to flush output file: {}", .{err});
                 };
             } else {
-                run(parsed, allocator, io.io(), stderr_writer, stdout_writer);
+                run(allocator, io.io(), parsed, stderr_writer, stdout_writer);
                 stdout_file_writer.flush() catch |err| {
                     std.log.err("failed to flush stdout: {}", .{err});
                 };

From 311ce1195e7e7482fcd82cd0817aa485ea5c08e6 Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@gmail.com>
Date: Fri, 8 May 2026 12:59:09 +0200
Subject: [PATCH 3/6] refactor: consolidate SQLite helpers and ColumnType into
 sqlite.zig

---
 src/main.zig   | 228 +++----------------------------------------------
 src/sqlite.zig | 169 ++++++++++++++++++++++++++++++++++++
 2 files changed, 180 insertions(+), 217 deletions(-)

diff --git a/src/main.zig b/src/main.zig
index 839b32f..9834a21 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -6,15 +6,12 @@ const xml = @import("xml.zig");
 const format = @import("format.zig");
 const build_options = @import("build_options");
 const args_mod = @import("args.zig");
+const sqlite_mod = @import("sqlite.zig");
+const ColumnType = sqlite_mod.ColumnType;
+const sqlite_static = sqlite_mod.sqlite_static;
 
 const VERSION: []const u8 = build_options.version;
 
-/// SQLITE_STATIC sentinel: tells sqlite3_bind_text that the string is
-/// caller-managed and SQLite must not attempt to free it.
-const sqlite_static: c.sqlite3_destructor_type = null;
-
-/// SQLITE_TRANSIENT sentinel: tells sqlite3_bind_text to copy the string
-/// immediately (safe for short-lived source buffers, e.g. JSON arena data).
 const SqlPipeError = args_mod.SqlPipeError;
 const ParsedArgs = args_mod.ParsedArgs;
 const ColumnsArgs = args_mod.ColumnsArgs;
@@ -26,9 +23,6 @@ const printUsage = args_mod.printUsage;
 
 // ─── Column type inference ────────────────────────────
 
-/// Inferred SQLite affinity for a CSV column.
-const ColumnType = enum { TEXT, INTEGER, REAL };
-
 /// Number of rows buffered from stdin to infer column types.
 const inference_buffer_size: usize = 100;
 
@@ -53,16 +47,6 @@ const InputFormat = format.InputFormat;
 /// Supported output formats (canonical definition lives in format.zig).
 const OutputFormat = format.OutputFormat;
 
-/// openDb() → *sqlite3
-/// Pre:  —
-/// Post: result is an open, empty in-memory SQLite database handle
-///       error.OpenDbFailed when sqlite3_open returns non-SQLITE_OK
-fn openDb() SqlPipeError!*c.sqlite3 {
-    var db: ?*c.sqlite3 = null;
-    if (c.sqlite3_open(":memory:", &db) != c.SQLITE_OK) return error.OpenDbFailed;
-    return db.?;
-}
-
 /// stripQuotes(raw) → []const u8
 /// Pre:  raw is a valid UTF-8 slice
 /// Post: if raw = '"' ++ inner ++ '"'  =>  result = inner
@@ -234,80 +218,6 @@ fn parseHeader(
     return cols.toOwnedSlice(allocator);
 }
 
-/// createTable(db, cols, types, allocator) → void
-/// Pre:  db is an open SQLite handle
-///       cols.len > 0
-///       types.len = cols.len
-///       allocator is valid
-/// Post: table `t` exists in db with cols.len columns named by cols;
-///       each column's SQL type reflects its ColumnType value
-///       (INTEGER / REAL / TEXT with correct SQLite affinity)
-///       column identifiers are double-quote escaped per SQL syntax
-///       error.CreateTableFailed when sqlite3_exec returns non-SQLITE_OK
-fn createTable(
-    allocator: std.mem.Allocator,
-    db: *c.sqlite3,
-    cols: []const []const u8,
-    types: []const ColumnType,
-) (SqlPipeError || std.mem.Allocator.Error)!void {
-    var sql: std.ArrayList(u8) = .empty;
-    defer sql.deinit(allocator);
-
-    try sql.appendSlice(allocator, "CREATE TABLE t (");
-    // Loop invariant I: sql = "CREATE TABLE t (" ++ columns[0..i] joined by ", "
-    // Bounding function: cols.len - i
-    for (cols, 0..) |col, i| {
-        if (i > 0) try sql.appendSlice(allocator, ", ");
-        try sql.append(allocator, '"');
-        // Escape embedded double-quotes by doubling them (SQL identifier rule)
-        for (col) |ch| {
-            if (ch == '"') try sql.append(allocator, '"');
-            try sql.append(allocator, ch);
-        }
-        try sql.append(allocator, '"');
-        try sql.appendSlice(allocator, switch (types[i]) {
-            .INTEGER => " INTEGER",
-            .REAL => " REAL",
-            .TEXT => " TEXT",
-        });
-    }
-    try sql.appendSlice(allocator, ")");
-    try sql.append(allocator, 0); // null-terminate for the C API
-
-    var errmsg: [*c]u8 = null;
-    if (c.sqlite3_exec(db, sql.items.ptr, null, null, &errmsg) != c.SQLITE_OK) {
-        if (errmsg != null) c.sqlite3_free(errmsg);
-        return error.CreateTableFailed;
-    }
-}
-
-/// prepareInsert(db, n, allocator) → *sqlite3_stmt
-/// Pre:  db is open, table `t` exists with n TEXT columns, n > 0
-///       allocator is valid
-/// Post: result is a prepared `INSERT INTO t VALUES (?,…,?)` with n parameters
-///       error.PrepareInsertFailed when sqlite3_prepare_v2 returns non-SQLITE_OK
-fn prepareInsert(
-    allocator: std.mem.Allocator,
-    db: *c.sqlite3,
-    n: usize,
-) (SqlPipeError || std.mem.Allocator.Error)!*c.sqlite3_stmt {
-    var sql: std.ArrayList(u8) = .empty;
-    defer sql.deinit(allocator);
-
-    try sql.appendSlice(allocator, "INSERT INTO t VALUES (");
-    for (0..n) |i| {
-        if (i > 0) try sql.append(allocator, ',');
-        try sql.append(allocator, '?');
-    }
-    try sql.appendSlice(allocator, ")");
-    try sql.append(allocator, 0);
-
-    var stmt: ?*c.sqlite3_stmt = null;
-    if (c.sqlite3_prepare_v2(db, sql.items.ptr, -1, &stmt, null) != c.SQLITE_OK)
-        return error.PrepareInsertFailed;
-    return stmt.?;
-}
-
 /// insertRowTyped(stmt, db, row, types, param_count) → void
 /// Pre:  stmt is a prepared INSERT with param_count parameters, freshly reset
 ///       row is a non-empty CSV record (slice of field slices)
@@ -429,102 +339,6 @@ fn execQuery(
     try out_writer.end(writer);
 }
 
-// ─── SQL error context helpers ────────────────────────
-
-/// Compute the Levenshtein edit distance between two strings.
-/// Uses two-row DP over at most max_len characters per string.
-fn levenshteinDistance(a: []const u8, b: []const u8) usize {
-    const max_len = 128;
-    var prev: [max_len + 1]usize = undefined;
-    var curr: [max_len + 1]usize = undefined;
-    const a_len = @min(a.len, max_len);
-    const b_len = @min(b.len, max_len);
-
-    for (0..b_len + 1) |j| prev[j] = j;
-    for (0..a_len) |i| {
-        curr[0] = i + 1;
-        for (0..b_len) |j| {
-            const cost: usize = if (a[i] == b[j]) 0 else 1;
-            curr[j + 1] = @min(curr[j] + 1, @min(prev[j + 1] + 1, prev[j] + cost));
-        }
-        @memcpy(prev[0 .. b_len + 1], curr[0 .. b_len + 1]);
-    }
-    return prev[b_len];
-}
-
-/// Return column names of table `t` via PRAGMA table_info.
-/// Caller owns the returned slice; free each element and the slice with allocator.
-/// Returns empty slice on PRAGMA failure.
-fn getTableColumns(allocator: std.mem.Allocator, db: *c.sqlite3) ![][]const u8 {
-    var stmt: ?*c.sqlite3_stmt = null;
-    if (c.sqlite3_prepare_v2(db, "PRAGMA table_info(t)", -1, &stmt, null) != c.SQLITE_OK)
-        return &.{};
-    defer _ = c.sqlite3_finalize(stmt);
-
-    var cols = std.ArrayList([]const u8).empty;
-    errdefer {
-        for (cols.items) |col| allocator.free(col);
-        cols.deinit(allocator);
-    }
-
-    while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
-        // PRAGMA table_info columns: cid(0), name(1), type(2), notnull(3), dflt_value(4), pk(5)
-        const ptr = c.sqlite3_column_text(stmt, 1);
-        if (ptr == null) continue;
-        const name = std.mem.span(@as([*:0]const u8, @ptrCast(ptr)));
-        const owned = try allocator.dupe(u8, name);
-        errdefer allocator.free(owned);
-        try cols.append(allocator, owned);
-    }
-
-    return cols.toOwnedSlice(allocator);
-}
-
-/// Print column context to writer after a SQL error.
-/// Prints "  table \"t\" has columns: ..." and optionally "  hint: did you mean \"<col>\"?"
-/// when the error message matches "no such column: <name>" and a column exists within edit distance 2.
-/// Silently returns on any failure (PRAGMA unavailable, OOM, writer error).
-fn printSqlErrorContext(
-    allocator: std.mem.Allocator,
-    db: *c.sqlite3,
-    errmsg: []const u8,
-    writer: *std.Io.Writer,
-) void {
-    const columns = getTableColumns(allocator, db) catch return;
-    defer {
-        for (columns) |col| allocator.free(col);
-        allocator.free(columns);
-    }
-    if (columns.len == 0) return;
-
-    writer.writeAll("  table \"t\" has columns: ") catch return;
-    for (columns, 0..) |col, i| {
-        if (i > 0) writer.writeAll(", ") catch return;
-        writer.writeAll(col) catch return;
-    }
-    writer.writeByte('\n') catch return;
-
-    // Suggest the closest column when the error is "no such column: <name>"
-    const no_such_col = "no such column: ";
-    if (std.mem.find(u8, errmsg, no_such_col)) |start| {
-        const missing = errmsg[start + no_such_col.len ..];
-        var best_col: ?[]const u8 = null;
-        var best_dist: usize = std.math.maxInt(usize);
-        for (columns) |col| {
-            const dist = levenshteinDistance(missing, col);
-            if (dist < best_dist) {
-                best_dist = dist;
-                best_col = col;
-            }
-        }
-        if (best_dist <= 2) {
-            if (best_col) |col| {
-                writer.print("  hint: did you mean \"{s}\"?\n", .{col}) catch return;
-            }
-        }
-    }
-}
-
 // ─── Entry point ──────────────────────────────────────
 
 /// fmtThousands(buf, n) → []const u8
@@ -585,23 +399,6 @@ fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, args:
     std.process.exit(@intFromEnum(code));
 }
 
-/// Print SQL error message with column context then exit with sql_error code.
-/// Pre:  errmsg is the SQLite error string; db has table `t` (or PRAGMA silently fails)
-/// Post: stderr has "error: <msg>\n" + optional column list + optional hint; process exits 3
-fn fatalSqlWithContext(
-    allocator: std.mem.Allocator,
-    db: *c.sqlite3,
-    errmsg: []const u8,
-    writer: *std.Io.Writer,
-) noreturn {
-    writer.print("error: {s}\n", .{errmsg}) catch |err| {
-        std.log.err("failed to write error message: {}", .{err});
-    };
-    printSqlErrorContext(allocator, db, errmsg, writer);
-    writer.flush() catch |err| std.log.err("failed to flush: {}", .{err});
-    std.process.exit(@intFromEnum(ExitCode.sql_error));
-}
-
 /// loadCsvInput loads all CSV rows from stdin into db table `t`.
 /// Pre:  db is an open in-memory SQLite handle with no tables yet
 ///       parsed.delimiter is valid; allocator and writers are valid
@@ -682,19 +479,17 @@ fn loadCsvInput(
 
     // ─── Phase 2: create table and insert rows ────────────────────────────────
 
-    createTable(allocator, db, cols, types) catch
-        fatal("{s}", stderr_writer, .sql_error, .{std.mem.span(c.sqlite3_errmsg(db))});
+    sqlite_mod.createTable(allocator, db, cols, types, stderr_writer);
 
     {
         var errmsg: [*c]u8 = null;
         if (c.sqlite3_exec(db, "BEGIN TRANSACTION", null, null, &errmsg) != c.SQLITE_OK) {
             const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
-            fatalSqlWithContext(allocator, db, msg, stderr_writer);
+            sqlite_mod.fatalSqlWithContext(allocator, db, msg, stderr_writer);
         }
     }
 
-    const stmt = prepareInsert(allocator, db, num_cols) catch
-        fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
+    const stmt = sqlite_mod.prepareInsertStmt(allocator, db, num_cols, stderr_writer);
     defer _ = c.sqlite3_finalize(stmt);
 
     const is_tty = std.Io.File.isTty(std.Io.File.stderr(), io) catch false;
@@ -708,7 +503,7 @@ fn loadCsvInput(
                 fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
         }
         insertRowTyped(stmt, db, row, types, @intCast(num_cols)) catch
-            fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
+            sqlite_mod.fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
         if (is_tty and rows_inserted % progress_interval == 0)
             printProgress(stderr_writer, rows_inserted, parsed.max_rows);
     }
@@ -740,7 +535,7 @@ fn loadCsvInput(
                 fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
         }
         insertRowTyped(stmt, db, record, types, @intCast(num_cols)) catch
-            fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
+            sqlite_mod.fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
         if (is_tty and rows_inserted % progress_interval == 0)
             printProgress(stderr_writer, rows_inserted, parsed.max_rows);
     }
@@ -750,7 +545,7 @@ fn loadCsvInput(
         const rc = c.sqlite3_exec(db, "COMMIT", null, null, &errmsg);
         if (rc != c.SQLITE_OK) {
             const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
-            fatalSqlWithContext(allocator, db, msg, stderr_writer);
+            sqlite_mod.fatalSqlWithContext(allocator, db, msg, stderr_writer);
         }
         if (errmsg != null) c.sqlite3_free(errmsg);
     }
@@ -1404,8 +1199,7 @@ fn run(
 ) void {
     const query = parsed.query;
 
-    const db = openDb() catch
-        fatal("failed to open in-memory database", stderr_writer, .sql_error, .{});
+    const db = sqlite_mod.openDb(stderr_writer);
     defer _ = c.sqlite3_close(db);
 
     const start_ts = std.Io.Timestamp.now(io, .awake);
@@ -1457,7 +1251,7 @@ fn run(
 
     execQuery(allocator, db, query, stdout_writer, parsed.header, parsed.output_format, parsed.xml_root, parsed.xml_row) catch {
         stdout_writer.flush() catch |err| std.log.err("failed to flush output before fatal: {}", .{err});
-        fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
+        sqlite_mod.fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
     };
 }
 
diff --git a/src/sqlite.zig b/src/sqlite.zig
index 5222f1e..e1ef096 100644
--- a/src/sqlite.zig
+++ b/src/sqlite.zig
@@ -6,6 +6,9 @@ const c = @import("c");
 /// SQLITE_STATIC: caller manages string lifetime; SQLite must not free it.
 pub const sqlite_static: c.sqlite3_destructor_type = null;
 
+/// Inferred SQLite affinity for a CSV column.
+pub const ColumnType = enum { TEXT, INTEGER, REAL };
+
 // Shared exit codes (same values as in each format module)
 pub const exit_usage: u8 = 1;
 pub const exit_parse: u8 = 2;
@@ -93,3 +96,169 @@ pub fn commitTransaction(db: *c.sqlite3, writer: *std.Io.Writer) void {
         fatal("{s}", writer, exit_sql, .{msg});
     }
 }
+
+/// openDb(writer) → *sqlite3
+/// Pre:  —
+/// Post: result is an open, empty in-memory SQLite database handle
+pub fn openDb(writer: *std.Io.Writer) *c.sqlite3 {
+    var db: ?*c.sqlite3 = null;
+    if (c.sqlite3_open(":memory:", &db) != c.SQLITE_OK)
+        fatal("failed to open in-memory database", writer, exit_sql, .{});
+    return db.?;
+}
+
+/// createTable(allocator, db, cols, types, writer) → void
+/// Pre:  db is an open SQLite handle
+///       cols.len > 0
+///       types.len = cols.len
+///       allocator is valid
+/// Post: table `t` exists in db with cols.len columns named by cols;
+///       each column's SQL type reflects its ColumnType value
+///       (INTEGER / REAL / TEXT with correct SQLite affinity)
+///       column identifiers are double-quote escaped per SQL syntax
+pub fn createTable(
+    allocator: std.mem.Allocator,
+    db: *c.sqlite3,
+    cols: []const []const u8,
+    types: []const ColumnType,
+    writer: *std.Io.Writer,
+) void {
+    var sql: std.ArrayList(u8) = .empty;
+    defer sql.deinit(allocator);
+
+    sql.appendSlice(allocator, "CREATE TABLE t (") catch fatal("out of memory", writer, exit_parse, .{});
+    for (cols, 0..) |col, i| {
+        if (i > 0) sql.appendSlice(allocator, ", ") catch fatal("out of memory", writer, exit_parse, .{});
+        sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{});
+        for (col) |ch| {
+            if (ch == '"') sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{});
+            sql.append(allocator, ch) catch fatal("out of memory", writer, exit_parse, .{});
+        }
+        sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{});
+        sql.appendSlice(allocator, switch (types[i]) {
+            .INTEGER => " INTEGER",
+            .REAL => " REAL",
+            .TEXT => " TEXT",
+        }) catch fatal("out of memory", writer, exit_parse, .{});
+    }
+    sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, exit_parse, .{});
+    sql.append(allocator, 0) catch fatal("out of memory", writer, exit_parse, .{});
+
+    var errmsg: [*c]u8 = null;
+    if (c.sqlite3_exec(db, sql.items.ptr, null, null, &errmsg) != c.SQLITE_OK) {
+        const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
+        if (errmsg != null) c.sqlite3_free(errmsg);
+        fatal("{s}", writer, exit_sql, .{msg});
+    }
+}
+
+/// Compute the Levenshtein edit distance between two strings.
+/// Uses two-row DP over at most max_len characters per string.
+pub fn levenshteinDistance(a: []const u8, b: []const u8) usize {
+    const max_len = 128;
+    var prev: [max_len + 1]usize = undefined;
+    var curr: [max_len + 1]usize = undefined;
+    const a_len = @min(a.len, max_len);
+    const b_len = @min(b.len, max_len);
+
+    for (0..b_len + 1) |j| prev[j] = j;
+    for (0..a_len) |i| {
+        curr[0] = i + 1;
+        for (0..b_len) |j| {
+            const cost: usize = if (a[i] == b[j]) 0 else 1;
+            curr[j + 1] = @min(curr[j] + 1, @min(prev[j + 1] + 1, prev[j] + cost));
+        }
+        @memcpy(prev[0 .. b_len + 1], curr[0 .. b_len + 1]);
+    }
+    return prev[b_len];
+}
+
+/// Return column names of table `t` via PRAGMA table_info.
+/// Caller owns the returned slice; free each element and the slice with allocator.
+/// Returns empty slice on PRAGMA failure.
+pub fn getTableColumns(allocator: std.mem.Allocator, db: *c.sqlite3) ![][]const u8 {
+    var stmt: ?*c.sqlite3_stmt = null;
+    if (c.sqlite3_prepare_v2(db, "PRAGMA table_info(t)", -1, &stmt, null) != c.SQLITE_OK)
+        return &.{};
+    defer _ = c.sqlite3_finalize(stmt);
+
+    var cols = std.ArrayList([]const u8).empty;
+    errdefer {
+        for (cols.items) |col| allocator.free(col);
+        cols.deinit(allocator);
+    }
+
+    while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
+        // PRAGMA table_info columns: cid(0), name(1), type(2), notnull(3), dflt_value(4), pk(5)
+        const ptr = c.sqlite3_column_text(stmt, 1);
+        if (ptr == null) continue;
+        const name = std.mem.span(@as([*:0]const u8, @ptrCast(ptr)));
+        const owned = try allocator.dupe(u8, name);
+        errdefer allocator.free(owned);
+        try cols.append(allocator, owned);
+    }
+
+    return cols.toOwnedSlice(allocator);
+}
+
+/// Print column context to writer after a SQL error.
+/// Prints "  table \"t\" has columns: ..." and optionally "  hint: did you mean \"<col>\"?"
+/// when the error message matches "no such column: <name>" and a column exists within edit distance 2.
+/// Silently returns on any failure (PRAGMA unavailable, OOM, writer error).
+pub fn printSqlErrorContext(
+    allocator: std.mem.Allocator,
+    db: *c.sqlite3,
+    errmsg: []const u8,
+    writer: *std.Io.Writer,
+) void {
+    const columns = getTableColumns(allocator, db) catch return;
+    defer {
+        for (columns) |col| allocator.free(col);
+        allocator.free(columns);
+    }
+    if (columns.len == 0) return;
+
+    writer.writeAll("  table \"t\" has columns: ") catch return;
+    for (columns, 0..) |col, i| {
+        if (i > 0) writer.writeAll(", ") catch return;
+        writer.writeAll(col) catch return;
+    }
+    writer.writeByte('\n') catch return;
+
+    // Suggest the closest column when the error is "no such column: <name>"
+    const no_such_col = "no such column: ";
+    if (std.mem.find(u8, errmsg, no_such_col)) |start| {
+        const missing = errmsg[start + no_such_col.len ..];
+        var best_col: ?[]const u8 = null;
+        var best_dist: usize = std.math.maxInt(usize);
+        for (columns) |col| {
+            const dist = levenshteinDistance(missing, col);
+            if (dist < best_dist) {
+                best_dist = dist;
+                best_col = col;
+            }
+        }
+        if (best_dist <= 2) {
+            if (best_col) |col| {
+                writer.print("  hint: did you mean \"{s}\"?\n", .{col}) catch return;
+            }
+        }
+    }
+}
+
+/// Print SQL error message with column context then exit with sql_error code.
+/// Pre:  errmsg is the SQLite error string; db has table `t` (or PRAGMA silently fails)
+/// Post: stderr has "error: <msg>\n" + optional column list + optional hint; process exits 3
+pub fn fatalSqlWithContext(
+    allocator: std.mem.Allocator,
+    db: *c.sqlite3,
+    errmsg: []const u8,
+    writer: *std.Io.Writer,
+) noreturn {
+    writer.print("error: {s}\n", .{errmsg}) catch |err| {
+        std.log.err("failed to write error message: {}", .{err});
+    };
+    printSqlErrorContext(allocator, db, errmsg, writer);
+    writer.flush() catch |err| std.log.err("failed to flush: {}", .{err});
+    std.process.exit(exit_sql);
+}

From b1825ed19e323832396b83f12e4c00108ac9c1a2 Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@gmail.com>
Date: Fri, 8 May 2026 13:12:34 +0200
Subject: [PATCH 4/6] refactor: extract CSV loader and type inference into
 loader.zig

---
 src/loader.zig | 468 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/main.zig   | 469 ++-----------------------------------------------
 2 files changed, 478 insertions(+), 459 deletions(-)
 create mode 100644 src/loader.zig

diff --git a/src/loader.zig b/src/loader.zig
new file mode 100644
index 0000000..77d0906
--- /dev/null
+++ b/src/loader.zig
@@ -0,0 +1,468 @@
+//! CSV loader — type inference, header parsing, and loading CSV/TSV into SQLite.
+
+const std = @import("std");
+const c = @import("c");
+const csv_mod = @import("csv.zig");
+const sqlite_mod = @import("sqlite.zig");
+const args_mod = @import("args.zig");
+
+const ColumnType = sqlite_mod.ColumnType;
+const sqlite_static = sqlite_mod.sqlite_static;
+
+const fatal = sqlite_mod.fatal;
+const fatalSqlWithContext = sqlite_mod.fatalSqlWithContext;
+
+/// Number of rows buffered from stdin to infer column types.
+pub const inference_buffer_size: usize = 100;
+
+/// Number of rows between progress indicator updates.
+pub const progress_interval: usize = 10_000;
+
+/// stripQuotes(raw) → []const u8
+/// Pre:  raw is a valid UTF-8 slice
+/// Post: if raw = '"' ++ inner ++ '"'  =>  result = inner
+///       otherwise                     =>  result = raw
+/// Note: RFC 4180 quoted-field unescaping is handled by csv.zig; this function
+///       provides an explicit, single-location implementation for any residual
+///       direct string handling that bypasses the CSV parser.
+fn stripQuotes(raw: []const u8) []const u8 {
+    if (raw.len >= 2 and raw[0] == '"' and raw[raw.len - 1] == '"')
+        return raw[1 .. raw.len - 1];
+    return raw;
+}
+
+/// isInteger(val) → bool
+/// Pre:  val is a valid UTF-8 slice
+/// Post: result = val matches [+-]?[0-9]+  (non-empty, only digits after optional sign)
+pub fn isInteger(val: []const u8) bool {
+    if (val.len == 0) return false;
+    var i: usize = 0;
+    if (val[0] == '+' or val[0] == '-') i = 1;
+    if (i >= val.len) return false; // sign only → not an integer
+    // Loop invariant I: val[0..i] is a valid integer prefix (sign + digits)
+    // Bounding function: val.len - i
+    while (i < val.len) : (i += 1) {
+        if (val[i] < '0' or val[i] > '9') return false;
+    }
+    return true;
+}
+
+/// isReal(val) → bool
+/// Pre:  val is a valid UTF-8 slice
+/// Post: result = val is parseable as a 64-bit floating-point number
+/// Note: returns true for integers too; callers should check isInteger first
+///       for finer classification.
+pub fn isReal(val: []const u8) bool {
+    if (val.len == 0) return false;
+    _ = std.fmt.parseFloat(f64, val) catch return false;
+    return true;
+}
+
+/// inferTypes(buffer, num_cols, allocator) → []ColumnType
+/// Pre:  buffer is a slice of rows (each row is a slice of field strings)
+///       num_cols > 0; allocator is valid
+/// Post: result.len = num_cols
+///       result[j] = INTEGER  ⟺  all non-empty values in column j are integers
+///       result[j] = REAL     ⟺  all non-empty values are numeric but at least one
+///                                is not a plain integer
+///       result[j] = TEXT     ⟺  at least one non-empty value is non-numeric,
+///                                OR no non-empty values exist
+pub fn inferTypes(
+    allocator: std.mem.Allocator,
+    buffer: []const [][]u8,
+    num_cols: usize,
+) std.mem.Allocator.Error![]ColumnType {
+    const types = try allocator.alloc(ColumnType, num_cols);
+    errdefer allocator.free(types);
+
+    const can_be_integer = try allocator.alloc(bool, num_cols);
+    defer allocator.free(can_be_integer);
+    const can_be_real = try allocator.alloc(bool, num_cols);
+    defer allocator.free(can_be_real);
+    const has_data = try allocator.alloc(bool, num_cols);
+    defer allocator.free(has_data);
+
+    // Initialise: optimistically assume every column can be INTEGER
+    for (0..num_cols) |j| {
+        can_be_integer[j] = true;
+        can_be_real[j] = true;
+        has_data[j] = false;
+    }
+
+    // Loop invariant I: for each j in 0..num_cols,
+    //   can_be_integer[j] = true  ⟺  all non-empty values in column j seen so far are integers
+    //   can_be_real[j]    = true  ⟺  all non-empty values in column j seen so far are numeric
+    //   has_data[j]       = true  ⟺  at least one non-empty value has been seen in column j
+    // Bounding function: buffer.len - row_idx
+    for (buffer) |row| {
+        for (row, 0..) |val, j| {
+            if (j >= num_cols) break;
+            if (val.len == 0) continue; // NULL/empty → skip, does not affect inference
+            has_data[j] = true;
+            if (!can_be_real[j]) continue; // already TEXT, no need to re-check
+            if (!isReal(val)) {
+                can_be_real[j] = false;
+                can_be_integer[j] = false;
+            } else if (!isInteger(val)) {
+                can_be_integer[j] = false;
+            }
+        }
+    }
+
+    // Determine final type per column
+    // Post: types[j] reflects can_be_integer[j] / can_be_real[j] / has_data[j]
+    for (0..num_cols) |j| {
+        if (has_data[j] and can_be_integer[j]) {
+            types[j] = .INTEGER;
+        } else if (has_data[j] and can_be_real[j]) {
+            types[j] = .REAL;
+        } else {
+            types[j] = .TEXT;
+        }
+    }
+
+    return types;
+}
+
+/// parseHeader(record, allocator, stderr_writer) → [][]const u8
+/// Pre:  record is a non-null CSV record (slice of owned UTF-8 field slices)
+///       allocator is valid
+///       stderr_writer is a valid writer (warnings are best-effort; write errors ignored)
+/// Post: result is a non-empty slice of trimmed column names (leading/trailing
+///       ASCII whitespace removed); UTF-8 BOM stripped from the first field
+///       duplicate names are suffixed (_2, _3, …) and a warning is written to
+///       stderr for each rename: `warning: duplicate column "<original>" renamed to "<new>"`
+///       error.EmptyColumnName when any trimmed name is empty
+///       error.NoColumns when record is empty
+pub fn parseHeader(
+    allocator: std.mem.Allocator,
+    record: [][]u8,
+    stderr_writer: *std.Io.Writer,
+) (args_mod.SqlPipeError || std.mem.Allocator.Error)![][]const u8 {
+    if (record.len == 0) return error.NoColumns;
+
+    // Strip UTF-8 BOM (\xEF\xBB\xBF) from first field if present
+    const bom = "\xEF\xBB\xBF";
+    if (std.mem.startsWith(u8, record[0], bom)) {
+        const without_bom = try allocator.dupe(u8, record[0][bom.len..]);
+        allocator.free(record[0]);
+        record[0] = without_bom;
+    }
+
+    var cols: std.ArrayList([]const u8) = .empty;
+    errdefer {
+        for (cols.items) |col| allocator.free(col);
+        cols.deinit(allocator);
+    }
+
+    // seen: maps a column name to the number of times it has appeared so far.
+    // Pre:  seen is empty
+    // Post: seen[name] = count of occurrences in record[0..i]
+    var seen = std.StringHashMap(usize).init(allocator);
+    defer seen.deinit();
+
+    // Loop invariant I: cols contains trimmed, non-empty (possibly suffixed) names for record[0..i]
+    //                   seen maps each base name to its occurrence count up to i
+    //                   all items in cols are heap-allocated (owned by allocator)
+    // Bounding function: record.len - i  (natural, decreasing, lower-bounded by 0)
+    for (record) |field| {
+        const base = std.mem.trim(u8, field, " \t\r");
+        if (base.len == 0) return error.EmptyColumnName;
+
+        const count = (seen.get(base) orelse 0) + 1;
+        try seen.put(base, count);
+
+        const col: []const u8 = if (count == 1)
+            try allocator.dupe(u8, base)
+        else blk: {
+            const renamed = try std.fmt.allocPrint(allocator, "{s}_{d}", .{ base, count });
+            // Best-effort warning to stderr; write errors are silently ignored
+            stderr_writer.print("warning: duplicate column \"{s}\" renamed to \"{s}\"\n", .{ base, renamed }) catch |err| {
+                std.log.err("failed to write warning: {}", .{err});
+            };
+            break :blk renamed;
+        };
+
+        try cols.append(allocator, col);
+    }
+
+    return cols.toOwnedSlice(allocator);
+}
+
+/// insertRowTyped(stmt, db, row, types, param_count) → void
+/// Pre:  stmt is a prepared INSERT with param_count parameters, freshly reset
+///       row is a non-empty CSV record (slice of field slices)
+///       types.len = param_count (or shorter → remaining treated as TEXT)
+///       db is the database that owns stmt (used for error reporting by caller)
+/// Post: each field is bound to its parameter using the appropriate SQLite bind
+///       function according to types[j]:
+///         INTEGER → sqlite3_bind_int64  (fallback: TEXT on parse failure)
+///         REAL    → sqlite3_bind_double (fallback: TEXT on parse failure)
+///         TEXT    → sqlite3_bind_text
+///       empty / missing values → sqlite3_bind_null
+///       sqlite3_step returned SQLITE_DONE
+///       error.BindFailed / error.StepFailed on SQLite errors
+pub fn insertRowTyped(
+    stmt: *c.sqlite3_stmt,
+    db: *c.sqlite3,
+    row: [][]u8,
+    types: []const ColumnType,
+    param_count: c_int,
+) args_mod.SqlPipeError!void {
+    _ = db;
+
+    _ = c.sqlite3_reset(stmt);
+    _ = c.sqlite3_clear_bindings(stmt);
+
+    var col_idx: c_int = 1;
+
+    // Loop invariant I: row[0..col_idx-1] are bound to params 1..col_idx-1
+    //                   using the appropriate SQLite bind function for each column type.
+    // Bounding function: row.len + 1 - col_idx (decreasing toward 0)
+    for (row) |val| {
+        if (col_idx > param_count) break;
+        const j: usize = @intCast(col_idx - 1);
+        const col_type: ColumnType = if (j < types.len) types[j] else .TEXT;
+
+        if (val.len == 0) {
+            // Empty / NULL value → bind as SQL NULL regardless of column type
+            if (c.sqlite3_bind_null(stmt, col_idx) != c.SQLITE_OK)
+                return error.BindFailed;
+        } else switch (col_type) {
+            .INTEGER => {
+                if (std.fmt.parseInt(i64, val, 10)) |n| {
+                    if (c.sqlite3_bind_int64(stmt, col_idx, n) != c.SQLITE_OK)
+                        return error.BindFailed;
+                } else |_| {
+                    // Parse failure: fall back to text binding
+                    if (c.sqlite3_bind_text(stmt, col_idx, val.ptr, @intCast(val.len), sqlite_static) != c.SQLITE_OK)
+                        return error.BindFailed;
+                }
+            },
+            .REAL => {
+                if (std.fmt.parseFloat(f64, val)) |f| {
+                    if (c.sqlite3_bind_double(stmt, col_idx, f) != c.SQLITE_OK)
+                        return error.BindFailed;
+                } else |_| {
+                    if (c.sqlite3_bind_text(stmt, col_idx, val.ptr, @intCast(val.len), sqlite_static) != c.SQLITE_OK)
+                        return error.BindFailed;
+                }
+            },
+            .TEXT => {
+                if (c.sqlite3_bind_text(stmt, col_idx, val.ptr, @intCast(val.len), sqlite_static) != c.SQLITE_OK)
+                    return error.BindFailed;
+            },
+        }
+        col_idx += 1;
+    }
+
+    // Bind NULL for any trailing columns the row is short of
+    // Loop invariant: params 1..col_idx-1 are bound; col_idx..param_count become NULL
+    while (col_idx <= param_count) : (col_idx += 1) {
+        if (c.sqlite3_bind_null(stmt, col_idx) != c.SQLITE_OK)
+            return error.BindFailed;
+    }
+
+    if (c.sqlite3_step(stmt) != c.SQLITE_DONE) return error.StepFailed;
+}
+
+/// fmtThousands(buf, n) → []const u8
+/// Pre:  buf.len >= 26 (accommodates any usize value with thousands separators)
+/// Post: n is formatted as a decimal string with ',' separating each group of
+///       three digits from the right (e.g. 42317 → "42,317", 1000 → "1,000")
+pub fn fmtThousands(buf: []u8, n: usize) []const u8 {
+    var tmp: [32]u8 = undefined; // 20 digits max (u64) + safety margin
+    const digits = std.fmt.bufPrint(&tmp, "{d}", .{n}) catch unreachable;
+    const len = digits.len;
+    const first_group = len % 3; // digits in the leading group (0 means groups of 3 from start)
+    var out_len: usize = 0;
+    // Loop invariant I: buf[0..out_len] = formatted prefix of digits[0..i]
+    //                   commas inserted before every third digit counted from the right
+    // Bounding function: len - i
+    for (digits, 0..) |ch, i| {
+        if ((i > 0 and i == first_group) or
+            (i > first_group and (i - first_group) % 3 == 0))
+        {
+            buf[out_len] = ',';
+            out_len += 1;
+        }
+        buf[out_len] = ch;
+        out_len += 1;
+    }
+    return buf[0..out_len];
+}
+
+/// printProgress(writer, n, max_rows) → void
+/// Pre:  writer is stderr; n > 0
+/// Post: "Loading... <n> rows\r" (or "Loading... <n> / <max> rows\r" when max_rows is set)
+///       written to writer with carriage return for in-place update; flushed immediately
+pub fn printProgress(writer: *std.Io.Writer, n: usize, max_rows: ?usize) void {
+    var count_buf: [32]u8 = undefined;
+    const count_str = fmtThousands(&count_buf, n);
+    if (max_rows) |limit| {
+        var limit_buf: [32]u8 = undefined;
+        const limit_str = fmtThousands(&limit_buf, limit);
+        writer.print("Loading... {s} / {s} rows\r", .{ count_str, limit_str }) catch |err| {
+            std.log.err("failed to write progress: {}", .{err});
+        };
+    } else {
+        writer.print("Loading... {s} rows\r", .{count_str}) catch |err| {
+            std.log.err("failed to write progress: {}", .{err});
+        };
+    }
+    writer.flush() catch |err| std.log.err("failed to flush progress: {}", .{err});
+}
+
+/// loadCsvInput loads all CSV rows from stdin into db table `t`.
+/// Pre:  db is an open in-memory SQLite handle with no tables yet
+///       parsed.delimiter is valid; allocator and writers are valid
+/// Post: table `t` exists in db with columns inferred from the CSV header;
+///       all CSV rows have been inserted; transaction has been committed
+///       returns rows_inserted (data rows only, header not counted)
+///       on error: writes message to stderr_writer and exits with appropriate code
+pub fn loadCsvInput(
+    allocator: std.mem.Allocator,
+    io: std.Io,
+    db: *c.sqlite3,
+    parsed: args_mod.ParsedArgs,
+    stderr_writer: *std.Io.Writer,
+) usize {
+    var stdin_buf: [4096]u8 = undefined;
+    var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+    var csv_reader = csv_mod.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, parsed.delimiter);
+
+    const header_record = csv_reader.nextRecord() catch |err| switch (err) {
+        error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, sqlite_mod.exit_parse, .{}),
+        else => fatal("row 1: failed to parse CSV header", stderr_writer, sqlite_mod.exit_parse, .{}),
+    } orelse fatal("empty input (no header row)", stderr_writer, sqlite_mod.exit_parse, .{});
+    defer csv_reader.freeRecord(header_record);
+
+    const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
+        error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, sqlite_mod.exit_parse, .{}),
+        error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, sqlite_mod.exit_parse, .{}),
+        else => fatal("row 1: failed to parse header", stderr_writer, sqlite_mod.exit_parse, .{}),
+    };
+    defer {
+        for (cols) |col| allocator.free(col);
+        allocator.free(cols);
+    }
+
+    const num_cols = cols.len;
+    var csv_row_count: usize = 1; // 1 = header already read
+
+    // ─── Phase 1: determine column types ─────────────────────────────────────
+    var row_buffer: std.ArrayList([][]u8) = .empty;
+    defer {
+        for (row_buffer.items) |row| csv_reader.freeRecord(row);
+        row_buffer.deinit(allocator);
+    }
+
+    const types: []ColumnType = if (parsed.type_inference) blk: {
+        while (row_buffer.items.len < inference_buffer_size) {
+            const rec = csv_reader.nextRecord() catch |err| switch (err) {
+                error.UnterminatedQuotedField => fatal(
+                    "row {d}: unterminated quoted field",
+                    stderr_writer,
+                    sqlite_mod.exit_parse,
+                    .{csv_row_count + 1},
+                ),
+                else => fatal(
+                    "row {d}: failed to parse CSV",
+                    stderr_writer,
+                    sqlite_mod.exit_parse,
+                    .{csv_row_count + 1},
+                ),
+            } orelse break;
+            csv_row_count += 1;
+            if (rec.len == 0) {
+                csv_reader.freeRecord(rec);
+                continue;
+            }
+            row_buffer.append(allocator, rec) catch
+                fatal("out of memory while buffering rows", stderr_writer, sqlite_mod.exit_parse, .{});
+        }
+        break :blk inferTypes(allocator, row_buffer.items, num_cols) catch
+            fatal("out of memory during type inference", stderr_writer, sqlite_mod.exit_parse, .{});
+    } else blk: {
+        const t = allocator.alloc(ColumnType, num_cols) catch
+            fatal("out of memory", stderr_writer, sqlite_mod.exit_parse, .{});
+        @memset(t, .TEXT);
+        break :blk t;
+    };
+    defer allocator.free(types);
+
+    // ─── Phase 2: create table and insert rows ────────────────────────────────
+
+    sqlite_mod.createTable(allocator, db, cols, types, stderr_writer);
+
+    {
+        var errmsg: [*c]u8 = null;
+        if (c.sqlite3_exec(db, "BEGIN TRANSACTION", null, null, &errmsg) != c.SQLITE_OK) {
+            const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
+            fatalSqlWithContext(allocator, db, msg, stderr_writer);
+        }
+    }
+
+    const stmt = sqlite_mod.prepareInsertStmt(allocator, db, num_cols, stderr_writer);
+    defer _ = c.sqlite3_finalize(stmt);
+
+    const is_tty = std.Io.File.isTty(std.Io.File.stderr(), io) catch false;
+    var rows_inserted: usize = 0;
+
+    // Insert buffered rows
+    for (row_buffer.items) |row| {
+        rows_inserted += 1;
+        if (parsed.max_rows) |limit| {
+            if (rows_inserted > limit)
+                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, sqlite_mod.exit_usage, .{limit});
+        }
+        insertRowTyped(stmt, db, row, types, @intCast(num_cols)) catch
+            fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
+        if (is_tty and rows_inserted % progress_interval == 0)
+            printProgress(stderr_writer, rows_inserted, parsed.max_rows);
+    }
+
+    // Stream remaining rows from stdin
+    while (true) {
+        const record = csv_reader.nextRecord() catch |err| switch (err) {
+            error.UnterminatedQuotedField => fatal(
+                "row {d}: unterminated quoted field",
+                stderr_writer,
+                sqlite_mod.exit_parse,
+                .{csv_row_count + 1},
+            ),
+            else => fatal(
+                "row {d}: failed to parse CSV",
+                stderr_writer,
+                sqlite_mod.exit_parse,
+                .{csv_row_count + 1},
+            ),
+        } orelse break;
+        csv_row_count += 1;
+        defer csv_reader.freeRecord(record);
+
+        if (record.len == 0) continue;
+
+        rows_inserted += 1;
+        if (parsed.max_rows) |limit| {
+            if (rows_inserted > limit)
+                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, sqlite_mod.exit_usage, .{limit});
+        }
+        insertRowTyped(stmt, db, record, types, @intCast(num_cols)) catch
+            fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
+        if (is_tty and rows_inserted % progress_interval == 0)
+            printProgress(stderr_writer, rows_inserted, parsed.max_rows);
+    }
+
+    {
+        var errmsg: [*c]u8 = null;
+        const rc = c.sqlite3_exec(db, "COMMIT", null, null, &errmsg);
+        if (rc != c.SQLITE_OK) {
+            const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
+            fatalSqlWithContext(allocator, db, msg, stderr_writer);
+        }
+        if (errmsg != null) c.sqlite3_free(errmsg);
+    }
+
+    return rows_inserted;
+}
diff --git a/src/main.zig b/src/main.zig
index 9834a21..78d1bd4 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -7,8 +7,9 @@ const format = @import("format.zig");
 const build_options = @import("build_options");
 const args_mod = @import("args.zig");
 const sqlite_mod = @import("sqlite.zig");
+const loader = @import("loader.zig");
+
 const ColumnType = sqlite_mod.ColumnType;
-const sqlite_static = sqlite_mod.sqlite_static;
 
 const VERSION: []const u8 = build_options.version;
 
@@ -21,13 +22,14 @@ const ArgsResult = args_mod.ArgsResult;
 const parseArgs = args_mod.parseArgs;
 const printUsage = args_mod.printUsage;
 
-// ─── Column type inference ────────────────────────────
-
-/// Number of rows buffered from stdin to infer column types.
-const inference_buffer_size: usize = 100;
-
-/// Number of rows between progress indicator updates.
-const progress_interval: usize = 10_000;
+const inferTypes = loader.inferTypes;
+const parseHeader = loader.parseHeader;
+const insertRowTyped = loader.insertRowTyped;
+const fmtThousands = loader.fmtThousands;
+const printProgress = loader.printProgress;
+const loadCsvInput = loader.loadCsvInput;
+const inference_buffer_size = loader.inference_buffer_size;
+const progress_interval = loader.progress_interval;
 
 /// Structured exit codes for scripting.
 ///   0 = success
@@ -47,254 +49,6 @@ const InputFormat = format.InputFormat;
 /// Supported output formats (canonical definition lives in format.zig).
 const OutputFormat = format.OutputFormat;
 
-/// stripQuotes(raw) → []const u8
-/// Pre:  raw is a valid UTF-8 slice
-/// Post: if raw = '"' ++ inner ++ '"'  =>  result = inner
-///       otherwise                     =>  result = raw
-/// Note: RFC 4180 quoted-field unescaping is handled by csv.zig; this function
-///       provides an explicit, single-location implementation for any residual
-///       direct string handling that bypasses the CSV parser.
-fn stripQuotes(raw: []const u8) []const u8 {
-    if (raw.len >= 2 and raw[0] == '"' and raw[raw.len - 1] == '"')
-        return raw[1 .. raw.len - 1];
-    return raw;
-}
-
-/// isInteger(val) → bool
-/// Pre:  val is a valid UTF-8 slice
-/// Post: result = val matches [+-]?[0-9]+  (non-empty, only digits after optional sign)
-fn isInteger(val: []const u8) bool {
-    if (val.len == 0) return false;
-    var i: usize = 0;
-    if (val[0] == '+' or val[0] == '-') i = 1;
-    if (i >= val.len) return false; // sign only → not an integer
-    // Loop invariant I: val[0..i] is a valid integer prefix (sign + digits)
-    // Bounding function: val.len - i
-    while (i < val.len) : (i += 1) {
-        if (val[i] < '0' or val[i] > '9') return false;
-    }
-    return true;
-}
-
-/// isReal(val) → bool
-/// Pre:  val is a valid UTF-8 slice
-/// Post: result = val is parseable as a 64-bit floating-point number
-/// Note: returns true for integers too; callers should check isInteger first
-///       for finer classification.
-fn isReal(val: []const u8) bool {
-    if (val.len == 0) return false;
-    _ = std.fmt.parseFloat(f64, val) catch return false;
-    return true;
-}
-
-/// inferTypes(buffer, num_cols, allocator) → []ColumnType
-/// Pre:  buffer is a slice of rows (each row is a slice of field strings)
-///       num_cols > 0; allocator is valid
-/// Post: result.len = num_cols
-///       result[j] = INTEGER  ⟺  all non-empty values in column j are integers
-///       result[j] = REAL     ⟺  all non-empty values are numeric but at least one
-///                                is not a plain integer
-///       result[j] = TEXT     ⟺  at least one non-empty value is non-numeric,
-///                                OR no non-empty values exist
-fn inferTypes(
-    allocator: std.mem.Allocator,
-    buffer: []const [][]u8,
-    num_cols: usize,
-) std.mem.Allocator.Error![]ColumnType {
-    const types = try allocator.alloc(ColumnType, num_cols);
-    errdefer allocator.free(types);
-
-    const can_be_integer = try allocator.alloc(bool, num_cols);
-    defer allocator.free(can_be_integer);
-    const can_be_real = try allocator.alloc(bool, num_cols);
-    defer allocator.free(can_be_real);
-    const has_data = try allocator.alloc(bool, num_cols);
-    defer allocator.free(has_data);
-
-    // Initialise: optimistically assume every column can be INTEGER
-    for (0..num_cols) |j| {
-        can_be_integer[j] = true;
-        can_be_real[j] = true;
-        has_data[j] = false;
-    }
-
-    // Loop invariant I: for each j in 0..num_cols,
-    //   can_be_integer[j] = true  ⟺  all non-empty values in column j seen so far are integers
-    //   can_be_real[j]    = true  ⟺  all non-empty values in column j seen so far are numeric
-    //   has_data[j]       = true  ⟺  at least one non-empty value has been seen in column j
-    // Bounding function: buffer.len - row_idx
-    for (buffer) |row| {
-        for (row, 0..) |val, j| {
-            if (j >= num_cols) break;
-            if (val.len == 0) continue; // NULL/empty → skip, does not affect inference
-            has_data[j] = true;
-            if (!can_be_real[j]) continue; // already TEXT, no need to re-check
-            if (!isReal(val)) {
-                can_be_real[j] = false;
-                can_be_integer[j] = false;
-            } else if (!isInteger(val)) {
-                can_be_integer[j] = false;
-            }
-        }
-    }
-
-    // Determine final type per column
-    // Post: types[j] reflects can_be_integer[j] / can_be_real[j] / has_data[j]
-    for (0..num_cols) |j| {
-        if (has_data[j] and can_be_integer[j]) {
-            types[j] = .INTEGER;
-        } else if (has_data[j] and can_be_real[j]) {
-            types[j] = .REAL;
-        } else {
-            types[j] = .TEXT;
-        }
-    }
-
-    return types;
-}
-
-/// parseHeader(record, allocator, stderr_writer) → [][]const u8
-/// Pre:  record is a non-null CSV record (slice of owned UTF-8 field slices)
-///       allocator is valid
-///       stderr_writer is a valid writer (warnings are best-effort; write errors ignored)
-/// Post: result is a non-empty slice of trimmed column names (leading/trailing
-///       ASCII whitespace removed); UTF-8 BOM stripped from the first field
-///       duplicate names are suffixed (_2, _3, …) and a warning is written to
-///       stderr for each rename: `warning: duplicate column "<original>" renamed to "<new>"`
-///       error.EmptyColumnName when any trimmed name is empty
-///       error.NoColumns when record is empty
-fn parseHeader(
-    allocator: std.mem.Allocator,
-    record: [][]u8,
-    stderr_writer: *std.Io.Writer,
-) (SqlPipeError || std.mem.Allocator.Error)![][]const u8 {
-    if (record.len == 0) return error.NoColumns;
-
-    // Strip UTF-8 BOM (\xEF\xBB\xBF) from first field if present
-    const bom = "\xEF\xBB\xBF";
-    if (std.mem.startsWith(u8, record[0], bom)) {
-        const without_bom = try allocator.dupe(u8, record[0][bom.len..]);
-        allocator.free(record[0]);
-        record[0] = without_bom;
-    }
-
-    var cols: std.ArrayList([]const u8) = .empty;
-    errdefer {
-        for (cols.items) |col| allocator.free(col);
-        cols.deinit(allocator);
-    }
-
-    // seen: maps a column name to the number of times it has appeared so far.
-    // Pre:  seen is empty
-    // Post: seen[name] = count of occurrences in record[0..i]
-    var seen = std.StringHashMap(usize).init(allocator);
-    defer seen.deinit();
-
-    // Loop invariant I: cols contains trimmed, non-empty (possibly suffixed) names for record[0..i]
-    //                   seen maps each base name to its occurrence count up to i
-    //                   all items in cols are heap-allocated (owned by allocator)
-    // Bounding function: record.len - i  (natural, decreasing, lower-bounded by 0)
-    for (record) |field| {
-        const base = std.mem.trim(u8, field, " \t\r");
-        if (base.len == 0) return error.EmptyColumnName;
-
-        const count = (seen.get(base) orelse 0) + 1;
-        try seen.put(base, count);
-
-        const col: []const u8 = if (count == 1)
-            try allocator.dupe(u8, base)
-        else blk: {
-            const renamed = try std.fmt.allocPrint(allocator, "{s}_{d}", .{ base, count });
-            // Best-effort warning to stderr; write errors are silently ignored
-            stderr_writer.print("warning: duplicate column \"{s}\" renamed to \"{s}\"\n", .{ base, renamed }) catch |err| {
-                std.log.err("failed to write warning: {}", .{err});
-            };
-            break :blk renamed;
-        };
-
-        try cols.append(allocator, col);
-    }
-
-    return cols.toOwnedSlice(allocator);
-}
-
-/// insertRowTyped(stmt, db, row, types, param_count) → void
-/// Pre:  stmt is a prepared INSERT with param_count parameters, freshly reset
-///       row is a non-empty CSV record (slice of field slices)
-///       types.len = param_count (or shorter → remaining treated as TEXT)
-///       db is the database that owns stmt (used for error reporting by caller)
-/// Post: each field is bound to its parameter using the appropriate SQLite bind
-///       function according to types[j]:
-///         INTEGER → sqlite3_bind_int64  (fallback: TEXT on parse failure)
-///         REAL    → sqlite3_bind_double (fallback: TEXT on parse failure)
-///         TEXT    → sqlite3_bind_text
-///       empty / missing values → sqlite3_bind_null
-///       sqlite3_step returned SQLITE_DONE
-///       error.BindFailed / error.StepFailed on SQLite errors
-fn insertRowTyped(
-    stmt: *c.sqlite3_stmt,
-    db: *c.sqlite3,
-    row: [][]u8,
-    types: []const ColumnType,
-    param_count: c_int,
-) SqlPipeError!void {
-    _ = db;
-
-    _ = c.sqlite3_reset(stmt);
-    _ = c.sqlite3_clear_bindings(stmt);
-
-    var col_idx: c_int = 1;
-
-    // Loop invariant I: row[0..col_idx-1] are bound to params 1..col_idx-1
-    //                   using the appropriate SQLite bind function for each column type.
-    // Bounding function: row.len + 1 - col_idx (decreasing toward 0)
-    for (row) |val| {
-        if (col_idx > param_count) break;
-        const j: usize = @intCast(col_idx - 1);
-        const col_type: ColumnType = if (j < types.len) types[j] else .TEXT;
-
-        if (val.len == 0) {
-            // Empty / NULL value → bind as SQL NULL regardless of column type
-            if (c.sqlite3_bind_null(stmt, col_idx) != c.SQLITE_OK)
-                return error.BindFailed;
-        } else switch (col_type) {
-            .INTEGER => {
-                if (std.fmt.parseInt(i64, val, 10)) |n| {
-                    if (c.sqlite3_bind_int64(stmt, col_idx, n) != c.SQLITE_OK)
-                        return error.BindFailed;
-                } else |_| {
-                    // Parse failure: fall back to text binding
-                    if (c.sqlite3_bind_text(stmt, col_idx, val.ptr, @intCast(val.len), sqlite_static) != c.SQLITE_OK)
-                        return error.BindFailed;
-                }
-            },
-            .REAL => {
-                if (std.fmt.parseFloat(f64, val)) |f| {
-                    if (c.sqlite3_bind_double(stmt, col_idx, f) != c.SQLITE_OK)
-                        return error.BindFailed;
-                } else |_| {
-                    if (c.sqlite3_bind_text(stmt, col_idx, val.ptr, @intCast(val.len), sqlite_static) != c.SQLITE_OK)
-                        return error.BindFailed;
-                }
-            },
-            .TEXT => {
-                if (c.sqlite3_bind_text(stmt, col_idx, val.ptr, @intCast(val.len), sqlite_static) != c.SQLITE_OK)
-                    return error.BindFailed;
-            },
-        }
-        col_idx += 1;
-    }
-
-    // Bind NULL for any trailing columns the row is short of
-    // Loop invariant: params 1..col_idx-1 are bound; col_idx..param_count become NULL
-    while (col_idx <= param_count) : (col_idx += 1) {
-        if (c.sqlite3_bind_null(stmt, col_idx) != c.SQLITE_OK)
-            return error.BindFailed;
-    }
-
-    if (c.sqlite3_step(stmt) != c.SQLITE_DONE) return error.StepFailed;
-}
-
 /// execQuery(db, query, allocator, writer, header, output_format) → !void
 /// Pre:  db is open with table `t` populated
 ///       query is a valid SQL string (not null-terminated)
@@ -339,55 +93,6 @@ fn execQuery(
     try out_writer.end(writer);
 }
 
-// ─── Entry point ──────────────────────────────────────
-
-/// fmtThousands(buf, n) → []const u8
-/// Pre:  buf.len >= 26 (accommodates any usize value with thousands separators)
-/// Post: n is formatted as a decimal string with ',' separating each group of
-///       three digits from the right (e.g. 42317 → "42,317", 1000 → "1,000")
-fn fmtThousands(buf: []u8, n: usize) []const u8 {
-    var tmp: [32]u8 = undefined; // 20 digits max (u64) + safety margin
-    const digits = std.fmt.bufPrint(&tmp, "{d}", .{n}) catch unreachable;
-    const len = digits.len;
-    const first_group = len % 3; // digits in the leading group (0 means groups of 3 from start)
-    var out_len: usize = 0;
-    // Loop invariant I: buf[0..out_len] = formatted prefix of digits[0..i]
-    //                   commas inserted before every third digit counted from the right
-    // Bounding function: len - i
-    for (digits, 0..) |ch, i| {
-        if ((i > 0 and i == first_group) or
-            (i > first_group and (i - first_group) % 3 == 0))
-        {
-            buf[out_len] = ',';
-            out_len += 1;
-        }
-        buf[out_len] = ch;
-        out_len += 1;
-    }
-    return buf[0..out_len];
-}
-
-/// printProgress(writer, n, max_rows) → void
-/// Pre:  writer is stderr; n > 0
-/// Post: "Loading... <n> rows\r" (or "Loading... <n> / <max> rows\r" when max_rows is set)
-///       written to writer with carriage return for in-place update; flushed immediately
-fn printProgress(writer: *std.Io.Writer, n: usize, max_rows: ?usize) void {
-    var count_buf: [32]u8 = undefined;
-    const count_str = fmtThousands(&count_buf, n);
-    if (max_rows) |limit| {
-        var limit_buf: [32]u8 = undefined;
-        const limit_str = fmtThousands(&limit_buf, limit);
-        writer.print("Loading... {s} / {s} rows\r", .{ count_str, limit_str }) catch |err| {
-            std.log.err("failed to write progress: {}", .{err});
-        };
-    } else {
-        writer.print("Loading... {s} rows\r", .{count_str}) catch |err| {
-            std.log.err("failed to write progress: {}", .{err});
-        };
-    }
-    writer.flush() catch |err| std.log.err("failed to flush progress: {}", .{err});
-}
-
 /// fatal(writer, code, comptime fmt, args) → noreturn
 /// Pre:  writer is stderr, code is non-zero ExitCode
 /// Post: "error: <message>\n" written to stderr, process exits with code
@@ -399,160 +104,6 @@ fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, args:
     std.process.exit(@intFromEnum(code));
 }
 
-/// loadCsvInput loads all CSV rows from stdin into db table `t`.
-/// Pre:  db is an open in-memory SQLite handle with no tables yet
-///       parsed.delimiter is valid; allocator and writers are valid
-/// Post: table `t` exists in db with columns inferred from the CSV header;
-///       all CSV rows have been inserted; transaction has been committed
-///       returns rows_inserted (data rows only, header not counted)
-///       on error: writes message to stderr_writer and exits with appropriate code
-fn loadCsvInput(
-    allocator: std.mem.Allocator,
-    io: std.Io,
-    db: *c.sqlite3,
-    parsed: ParsedArgs,
-    stderr_writer: *std.Io.Writer,
-) usize {
-    var stdin_buf: [4096]u8 = undefined;
-    var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-    var csv_reader = csv.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, parsed.delimiter);
-
-    const header_record = csv_reader.nextRecord() catch |err| switch (err) {
-        error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}),
-        else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}),
-    } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{});
-    defer csv_reader.freeRecord(header_record);
-
-    const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
-        error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}),
-        error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}),
-        else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}),
-    };
-    defer {
-        for (cols) |col| allocator.free(col);
-        allocator.free(cols);
-    }
-
-    const num_cols = cols.len;
-    var csv_row_count: usize = 1; // 1 = header already read
-
-    // ─── Phase 1: determine column types ─────────────────────────────────────
-    var row_buffer: std.ArrayList([][]u8) = .empty;
-    defer {
-        for (row_buffer.items) |row| csv_reader.freeRecord(row);
-        row_buffer.deinit(allocator);
-    }
-
-    const types: []ColumnType = if (parsed.type_inference) blk: {
-        while (row_buffer.items.len < inference_buffer_size) {
-            const rec = csv_reader.nextRecord() catch |err| switch (err) {
-                error.UnterminatedQuotedField => fatal(
-                    "row {d}: unterminated quoted field",
-                    stderr_writer,
-                    .csv_error,
-                    .{csv_row_count + 1},
-                ),
-                else => fatal(
-                    "row {d}: failed to parse CSV",
-                    stderr_writer,
-                    .csv_error,
-                    .{csv_row_count + 1},
-                ),
-            } orelse break;
-            csv_row_count += 1;
-            if (rec.len == 0) {
-                csv_reader.freeRecord(rec);
-                continue;
-            }
-            row_buffer.append(allocator, rec) catch
-                fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{});
-        }
-        break :blk inferTypes(allocator, row_buffer.items, num_cols) catch
-            fatal("out of memory during type inference", stderr_writer, .csv_error, .{});
-    } else blk: {
-        const t = allocator.alloc(ColumnType, num_cols) catch
-            fatal("out of memory", stderr_writer, .csv_error, .{});
-        @memset(t, .TEXT);
-        break :blk t;
-    };
-    defer allocator.free(types);
-
-    // ─── Phase 2: create table and insert rows ────────────────────────────────
-
-    sqlite_mod.createTable(allocator, db, cols, types, stderr_writer);
-
-    {
-        var errmsg: [*c]u8 = null;
-        if (c.sqlite3_exec(db, "BEGIN TRANSACTION", null, null, &errmsg) != c.SQLITE_OK) {
-            const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
-            sqlite_mod.fatalSqlWithContext(allocator, db, msg, stderr_writer);
-        }
-    }
-
-    const stmt = sqlite_mod.prepareInsertStmt(allocator, db, num_cols, stderr_writer);
-    defer _ = c.sqlite3_finalize(stmt);
-
-    const is_tty = std.Io.File.isTty(std.Io.File.stderr(), io) catch false;
-    var rows_inserted: usize = 0;
-
-    // Insert buffered rows
-    for (row_buffer.items) |row| {
-        rows_inserted += 1;
-        if (parsed.max_rows) |limit| {
-            if (rows_inserted > limit)
-                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
-        }
-        insertRowTyped(stmt, db, row, types, @intCast(num_cols)) catch
-            sqlite_mod.fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
-        if (is_tty and rows_inserted % progress_interval == 0)
-            printProgress(stderr_writer, rows_inserted, parsed.max_rows);
-    }
-
-    // Stream remaining rows from stdin
-    while (true) {
-        const record = csv_reader.nextRecord() catch |err| switch (err) {
-            error.UnterminatedQuotedField => fatal(
-                "row {d}: unterminated quoted field",
-                stderr_writer,
-                .csv_error,
-                .{csv_row_count + 1},
-            ),
-            else => fatal(
-                "row {d}: failed to parse CSV",
-                stderr_writer,
-                .csv_error,
-                .{csv_row_count + 1},
-            ),
-        } orelse break;
-        csv_row_count += 1;
-        defer csv_reader.freeRecord(record);
-
-        if (record.len == 0) continue;
-
-        rows_inserted += 1;
-        if (parsed.max_rows) |limit| {
-            if (rows_inserted > limit)
-                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
-        }
-        insertRowTyped(stmt, db, record, types, @intCast(num_cols)) catch
-            sqlite_mod.fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
-        if (is_tty and rows_inserted % progress_interval == 0)
-            printProgress(stderr_writer, rows_inserted, parsed.max_rows);
-    }
-
-    {
-        var errmsg: [*c]u8 = null;
-        const rc = c.sqlite3_exec(db, "COMMIT", null, null, &errmsg);
-        if (rc != c.SQLITE_OK) {
-            const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
-            sqlite_mod.fatalSqlWithContext(allocator, db, msg, stderr_writer);
-        }
-        if (errmsg != null) c.sqlite3_free(errmsg);
-    }
-
-    return rows_inserted;
-}
-
 /// runColumns(allocator, io, args, stderr_writer, stdout_writer) → void
 /// Pre:  args is valid; allocator and writers are valid
 /// Post: column names from the input header (CSV/JSON/NDJSON) are written to stdout,

From f8dc1c33ba1992d560c36572a8bea1d1ef6a290b Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@gmail.com>
Date: Fri, 8 May 2026 13:25:05 +0200
Subject: [PATCH 5/6] refactor: extract runColumns, runValidate, runSample into
 src/modes/

---
 src/main.zig           | 653 +----------------------------------------
 src/modes/columns.zig  | 204 +++++++++++++
 src/modes/sample.zig   | 167 +++++++++++
 src/modes/validate.zig | 323 ++++++++++++++++++++
 4 files changed, 701 insertions(+), 646 deletions(-)
 create mode 100644 src/modes/columns.zig
 create mode 100644 src/modes/sample.zig
 create mode 100644 src/modes/validate.zig

diff --git a/src/main.zig b/src/main.zig
index 78d1bd4..2bfd761 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -1,6 +1,5 @@
 const std = @import("std");
 const c = @import("c");
-const csv = @import("csv.zig");
 const json = @import("json.zig");
 const xml = @import("xml.zig");
 const format = @import("format.zig");
@@ -9,26 +8,19 @@ const args_mod = @import("args.zig");
 const sqlite_mod = @import("sqlite.zig");
 const loader = @import("loader.zig");
 
-const ColumnType = sqlite_mod.ColumnType;
+const columns_mode = @import("modes/columns.zig");
+const validate_mode = @import("modes/validate.zig");
+const sample_mode = @import("modes/sample.zig");
 
 const VERSION: []const u8 = build_options.version;
 
 const SqlPipeError = args_mod.SqlPipeError;
 const ParsedArgs = args_mod.ParsedArgs;
-const ColumnsArgs = args_mod.ColumnsArgs;
-const ValidateArgs = args_mod.ValidateArgs;
-const SampleArgs = args_mod.SampleArgs;
-const ArgsResult = args_mod.ArgsResult;
 const parseArgs = args_mod.parseArgs;
 const printUsage = args_mod.printUsage;
 
-const inferTypes = loader.inferTypes;
-const parseHeader = loader.parseHeader;
-const insertRowTyped = loader.insertRowTyped;
-const fmtThousands = loader.fmtThousands;
-const printProgress = loader.printProgress;
 const loadCsvInput = loader.loadCsvInput;
-const inference_buffer_size = loader.inference_buffer_size;
+const fmtThousands = loader.fmtThousands;
 const progress_interval = loader.progress_interval;
 
 /// Structured exit codes for scripting.
@@ -104,637 +96,6 @@ fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, args:
     std.process.exit(@intFromEnum(code));
 }
 
-/// runColumns(allocator, io, args, stderr_writer, stdout_writer) → void
-/// Pre:  args is valid; allocator and writers are valid
-/// Post: column names from the input header (CSV/JSON/NDJSON) are written to stdout,
-///       one per line; when args.verbose is true each line has format "<name> <TYPE>"
-///       (CSV only — JSON/NDJSON always show TEXT); exits 0 on success, 2 on parse error
-fn runColumns(
-    allocator: std.mem.Allocator,
-    io: std.Io,
-    args: ColumnsArgs,
-    stderr_writer: *std.Io.Writer,
-    stdout_writer: *std.Io.Writer,
-) void {
-    switch (args.input_format) {
-        .csv, .tsv => {
-            const col_delim: []const u8 = if (args.input_format == .tsv) "\t" else args.delimiter;
-            var stdin_buf: [4096]u8 = undefined;
-            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-            var csv_reader = csv.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, col_delim);
-
-            const header_record = csv_reader.nextRecord() catch |err| switch (err) {
-                error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}),
-                else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}),
-            } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{});
-            defer csv_reader.freeRecord(header_record);
-
-            const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
-                error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}),
-                error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}),
-                else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}),
-            };
-            defer {
-                for (cols) |col| allocator.free(col);
-                allocator.free(cols);
-            }
-
-            if (args.verbose) {
-                var row_buffer: std.ArrayList([][]u8) = .empty;
-                defer {
-                    for (row_buffer.items) |row| csv_reader.freeRecord(row);
-                    row_buffer.deinit(allocator);
-                }
-                var data_row: usize = 1;
-                while (row_buffer.items.len < inference_buffer_size) {
-                    data_row += 1;
-                    const rec = csv_reader.nextRecord() catch |err| switch (err) {
-                        error.UnterminatedQuotedField => fatal(
-                            "row {d}: unterminated quoted field",
-                            stderr_writer,
-                            .csv_error,
-                            .{data_row},
-                        ),
-                        else => fatal("row {d}: failed to parse CSV", stderr_writer, .csv_error, .{data_row}),
-                    } orelse break;
-                    if (rec.len == 0) {
-                        csv_reader.freeRecord(rec);
-                        continue;
-                    }
-                    row_buffer.append(allocator, rec) catch
-                        fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{});
-                }
-                const types = inferTypes(allocator, row_buffer.items, cols.len) catch
-                    fatal("out of memory during type inference", stderr_writer, .csv_error, .{});
-                defer allocator.free(types);
-                for (cols, types) |col, t| {
-                    stdout_writer.print("{s} {s}\n", .{ col, @tagName(t) }) catch |err| {
-                        std.log.err("failed to write output: {}", .{err});
-                    };
-                }
-            } else {
-                for (cols) |col| {
-                    stdout_writer.print("{s}\n", .{col}) catch |err| {
-                        std.log.err("failed to write output: {}", .{err});
-                    };
-                }
-            }
-        },
-        .json => {
-            var stdin_buf: [4096]u8 = undefined;
-            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-
-            var buf: std.ArrayList(u8) = .empty;
-            defer buf.deinit(allocator);
-            while (true) {
-                const byte = stdin_file_reader.interface.takeByte() catch |err| switch (err) {
-                    error.EndOfStream => break,
-                    error.ReadFailed => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}),
-                };
-                buf.append(allocator, byte) catch fatal("out of memory reading JSON", stderr_writer, .csv_error, .{});
-            }
-            if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{});
-
-            var parsed = std.json.parseFromSlice(std.json.Value, allocator, buf.items, .{}) catch
-                fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
-            defer parsed.deinit();
-
-            const array = switch (parsed.value) {
-                .array => |a| a,
-                else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
-            };
-            if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{});
-
-            const first_obj = switch (array.items[0]) {
-                .object => |o| o,
-                else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}),
-            };
-
-            var ki = first_obj.iterator();
-            while (ki.next()) |entry| {
-                if (args.verbose) {
-                    stdout_writer.print("{s} TEXT\n", .{entry.key_ptr.*}) catch |err| {
-                        std.log.err("failed to write output: {}", .{err});
-                    };
-                } else {
-                    stdout_writer.print("{s}\n", .{entry.key_ptr.*}) catch |err| {
-                        std.log.err("failed to write output: {}", .{err});
-                    };
-                }
-            }
-        },
-        .ndjson => {
-            var stdin_buf: [4096]u8 = undefined;
-            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-
-            // Read until we find a non-empty line
-            var line_num: usize = 0;
-            while (true) {
-                line_num += 1;
-                const line = json.readLine(allocator, &stdin_file_reader.interface) catch |err| switch (err) {
-                    error.OutOfMemory => fatal("out of memory reading NDJSON", stderr_writer, .csv_error, .{}),
-                    error.ReadFailed => fatal("line {d}: failed to read NDJSON", stderr_writer, .csv_error, .{line_num}),
-                } orelse fatal("empty NDJSON input", stderr_writer, .csv_error, .{});
-                defer allocator.free(line);
-
-                const trimmed = std.mem.trim(u8, line, " \t\r");
-                if (trimmed.len == 0) { line_num -= 1; continue; }
-
-                var parsed = std.json.parseFromSlice(std.json.Value, allocator, trimmed, .{}) catch
-                    fatal("line 1: failed to parse NDJSON", stderr_writer, .csv_error, .{});
-                defer parsed.deinit();
-
-                const obj = switch (parsed.value) {
-                    .object => |o| o,
-                    else => fatal("line 1: NDJSON element must be a JSON object", stderr_writer, .csv_error, .{}),
-                };
-
-                var ki = obj.iterator();
-                while (ki.next()) |entry| {
-                    if (args.verbose) {
-                        stdout_writer.print("{s} TEXT\n", .{entry.key_ptr.*}) catch |err| {
-                            std.log.err("failed to write output: {}", .{err});
-                        };
-                    } else {
-                        stdout_writer.print("{s}\n", .{entry.key_ptr.*}) catch |err| {
-                            std.log.err("failed to write output: {}", .{err});
-                        };
-                    }
-                }
-                break;
-            }
-        },
-        .xml => {
-            var stdin_buf: [4096]u8 = undefined;
-            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-
-            const names = xml.getXmlColumnNames(allocator, &stdin_file_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer);
-            defer {
-                for (names) |name| allocator.free(name);
-                allocator.free(names);
-            }
-            for (names) |name| {
-                if (args.verbose) {
-                    stdout_writer.print("{s} TEXT\n", .{name}) catch |err| {
-                        std.log.err("failed to write output: {}", .{err});
-                    };
-                } else {
-                    stdout_writer.print("{s}\n", .{name}) catch |err| {
-                        std.log.err("failed to write output: {}", .{err});
-                    };
-                }
-            }
-        },
-    }
-}
-
-/// runValidate(allocator, io, args, stderr_writer, stdout_writer) → void
-/// Pre:  args is valid; allocator and writers are valid
-/// Post: the entire input has been parsed (CSV, TSV, JSON, or NDJSON);
-///       on success prints "OK: <n> rows, <m> columns (<col> <TYPE>, ...)" to stdout.
-///       On parse error, prints the error message to stderr and exits 2.
-fn runValidate(
-    allocator: std.mem.Allocator,
-    io: std.Io,
-    args: ValidateArgs,
-    stderr_writer: *std.Io.Writer,
-    stdout_writer: *std.Io.Writer,
-) void {
-    switch (args.input_format) {
-        .csv, .tsv => {
-            const col_delim: []const u8 = if (args.input_format == .tsv) "\t" else args.delimiter;
-            var stdin_buf: [4096]u8 = undefined;
-            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-            var csv_reader = csv.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, col_delim);
-
-            const header_record = csv_reader.nextRecord() catch |err| switch (err) {
-                error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}),
-                else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}),
-            } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{});
-            defer csv_reader.freeRecord(header_record);
-
-            const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
-                error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}),
-                error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}),
-                else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}),
-            };
-            defer {
-                for (cols) |col| allocator.free(col);
-                allocator.free(cols);
-            }
-
-            const num_cols = cols.len;
-            var csv_row_count: usize = 1; // header already read
-            var data_row_count: usize = 0;
-
-            var row_buffer: std.ArrayList([][]u8) = .empty;
-            defer {
-                for (row_buffer.items) |row| csv_reader.freeRecord(row);
-                row_buffer.deinit(allocator);
-            }
-
-            // Buffer up to inference_buffer_size rows for type inference
-            while (row_buffer.items.len < inference_buffer_size) {
-                const rec = csv_reader.nextRecord() catch |err| switch (err) {
-                    error.UnterminatedQuotedField => fatal(
-                        "row {d}: unterminated quoted field",
-                        stderr_writer,
-                        .csv_error,
-                        .{csv_row_count + 1},
-                    ),
-                    else => fatal(
-                        "row {d}: failed to parse CSV",
-                        stderr_writer,
-                        .csv_error,
-                        .{csv_row_count + 1},
-                    ),
-                } orelse break;
-                csv_row_count += 1;
-                if (rec.len == 0) {
-                    csv_reader.freeRecord(rec);
-                    continue;
-                }
-                data_row_count += 1;
-                row_buffer.append(allocator, rec) catch
-                    fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{});
-            }
-
-            const types: []ColumnType = if (args.type_inference) blk: {
-                break :blk inferTypes(allocator, row_buffer.items, num_cols) catch
-                    fatal("out of memory during type inference", stderr_writer, .csv_error, .{});
-            } else blk: {
-                const t = allocator.alloc(ColumnType, num_cols) catch
-                    fatal("out of memory", stderr_writer, .csv_error, .{});
-                @memset(t, .TEXT);
-                break :blk t;
-            };
-            defer allocator.free(types);
-
-            // Stream remaining rows and count them
-            while (true) {
-                const record = csv_reader.nextRecord() catch |err| switch (err) {
-                    error.UnterminatedQuotedField => fatal(
-                        "row {d}: unterminated quoted field",
-                        stderr_writer,
-                        .csv_error,
-                        .{csv_row_count + 1},
-                    ),
-                    else => fatal(
-                        "row {d}: failed to parse CSV",
-                        stderr_writer,
-                        .csv_error,
-                        .{csv_row_count + 1},
-                    ),
-                } orelse break;
-                csv_row_count += 1;
-                defer csv_reader.freeRecord(record);
-                if (record.len == 0) continue;
-                data_row_count += 1;
-            }
-
-            var count_buf: [32]u8 = undefined;
-            const count_str = fmtThousands(&count_buf, data_row_count);
-
-            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, num_cols }) catch |err| {
-                std.log.err("failed to write output: {}", .{err});
-                std.process.exit(@intFromEnum(ExitCode.usage));
-            };
-
-            for (cols, types, 0..) |col, t, i| {
-                if (i > 0) {
-                    stdout_writer.writeAll(", ") catch |err| {
-                        std.log.err("failed to write output: {}", .{err});
-                        std.process.exit(@intFromEnum(ExitCode.usage));
-                    };
-                }
-                stdout_writer.print("{s} {s}", .{ col, @tagName(t) }) catch |err| {
-                    std.log.err("failed to write output: {}", .{err});
-                    std.process.exit(@intFromEnum(ExitCode.usage));
-                };
-            }
-            stdout_writer.writeAll(")\n") catch |err| {
-                std.log.err("failed to write output: {}", .{err});
-                std.process.exit(@intFromEnum(ExitCode.usage));
-            };
-        },
-        .json => {
-            var stdin_buf: [4096]u8 = undefined;
-            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-
-            var buf: std.ArrayList(u8) = .empty;
-            defer buf.deinit(allocator);
-            while (true) {
-                const byte = stdin_file_reader.interface.takeByte() catch |err| switch (err) {
-                    error.EndOfStream => break,
-                    error.ReadFailed => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}),
-                };
-                buf.append(allocator, byte) catch fatal("out of memory reading JSON", stderr_writer, .csv_error, .{});
-            }
-            if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{});
-
-            var parsed = std.json.parseFromSlice(std.json.Value, allocator, buf.items, .{}) catch
-                fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
-            defer parsed.deinit();
-
-            const array = switch (parsed.value) {
-                .array => |a| a,
-                else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
-            };
-            if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{});
-
-            const first_obj = switch (array.items[0]) {
-                .object => |o| o,
-                else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}),
-            };
-
-            var num_cols: usize = 0;
-            var ki = first_obj.iterator();
-            while (ki.next()) |_| num_cols += 1;
-
-            var count_buf: [32]u8 = undefined;
-            const count_str = fmtThousands(&count_buf, array.items.len);
-            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, num_cols }) catch |err| {
-                std.log.err("failed to write output: {}", .{err});
-                std.process.exit(@intFromEnum(ExitCode.usage));
-            };
-            ki = first_obj.iterator();
-            var col_i: usize = 0;
-            while (ki.next()) |entry| : (col_i += 1) {
-                if (col_i > 0) stdout_writer.writeAll(", ") catch |err| {
-                    std.log.err("failed to write output: {}", .{err});
-                    std.process.exit(@intFromEnum(ExitCode.usage));
-                };
-                stdout_writer.print("{s} TEXT", .{entry.key_ptr.*}) catch |err| {
-                    std.log.err("failed to write output: {}", .{err});
-                    std.process.exit(@intFromEnum(ExitCode.usage));
-                };
-            }
-            stdout_writer.writeAll(")\n") catch |err| {
-                std.log.err("failed to write output: {}", .{err});
-                std.process.exit(@intFromEnum(ExitCode.usage));
-            };
-        },
-        .ndjson => {
-            var stdin_buf: [4096]u8 = undefined;
-            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-
-            var line_num: usize = 0;
-            var row_count: usize = 0;
-            var cols_owned: ?[][]u8 = null;
-            defer if (cols_owned) |cs| {
-                for (cs) |col| allocator.free(col);
-                allocator.free(cs);
-            };
-
-            while (true) {
-                line_num += 1;
-                const line = json.readLine(allocator, &stdin_file_reader.interface) catch |err| switch (err) {
-                    error.OutOfMemory => fatal("out of memory reading NDJSON", stderr_writer, .csv_error, .{}),
-                    error.ReadFailed => fatal("line {d}: failed to read NDJSON", stderr_writer, .csv_error, .{line_num}),
-                } orelse break;
-                defer allocator.free(line);
-
-                const trimmed = std.mem.trim(u8, line, " \t\r");
-                if (trimmed.len == 0) {
-                    line_num -= 1;
-                    continue;
-                }
-
-                var parsed_line = std.json.parseFromSlice(std.json.Value, allocator, trimmed, .{}) catch
-                    fatal("line {d}: failed to parse NDJSON", stderr_writer, .csv_error, .{line_num});
-                defer parsed_line.deinit();
-
-                const obj = switch (parsed_line.value) {
-                    .object => |o| o,
-                    else => fatal("line {d}: NDJSON element must be a JSON object", stderr_writer, .csv_error, .{line_num}),
-                };
-
-                if (cols_owned == null) {
-                    var col_list: std.ArrayList([]u8) = .empty;
-                    errdefer {
-                        for (col_list.items) |col| allocator.free(col);
-                        col_list.deinit(allocator);
-                    }
-                    var ki = obj.iterator();
-                    while (ki.next()) |entry| {
-                        const owned_key = allocator.dupe(u8, entry.key_ptr.*) catch
-                            fatal("out of memory building column list", stderr_writer, .csv_error, .{});
-                        col_list.append(allocator, owned_key) catch
-                            fatal("out of memory building column list", stderr_writer, .csv_error, .{});
-                    }
-                    if (col_list.items.len == 0)
-                        fatal("line 1: first NDJSON object has no keys", stderr_writer, .csv_error, .{});
-                    cols_owned = col_list.toOwnedSlice(allocator) catch
-                        fatal("out of memory", stderr_writer, .csv_error, .{});
-                }
-                row_count += 1;
-            }
-
-            if (cols_owned == null) fatal("empty NDJSON input", stderr_writer, .csv_error, .{});
-
-            const cols = cols_owned.?;
-            var count_buf: [32]u8 = undefined;
-            const count_str = fmtThousands(&count_buf, row_count);
-            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, cols.len }) catch |err| {
-                std.log.err("failed to write output: {}", .{err});
-                std.process.exit(@intFromEnum(ExitCode.usage));
-            };
-            for (cols, 0..) |col, i| {
-                if (i > 0) stdout_writer.writeAll(", ") catch |err| {
-                    std.log.err("failed to write output: {}", .{err});
-                    std.process.exit(@intFromEnum(ExitCode.usage));
-                };
-                stdout_writer.print("{s} TEXT", .{col}) catch |err| {
-                    std.log.err("failed to write output: {}", .{err});
-                    std.process.exit(@intFromEnum(ExitCode.usage));
-                };
-            }
-            stdout_writer.writeAll(")\n") catch |err| {
-                std.log.err("failed to write output: {}", .{err});
-                std.process.exit(@intFromEnum(ExitCode.usage));
-            };
-        },
-        .xml => {
-            var stdin_buf: [4096]u8 = undefined;
-            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-
-            const summary = xml.summarizeXml(allocator, &stdin_file_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer);
-            defer {
-                for (summary.col_names) |name| allocator.free(name);
-                allocator.free(summary.col_names);
-            }
-
-            var count_buf: [32]u8 = undefined;
-            const count_str = fmtThousands(&count_buf, summary.row_count);
-            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, summary.col_names.len }) catch |err| {
-                std.log.err("failed to write output: {}", .{err});
-                std.process.exit(@intFromEnum(ExitCode.usage));
-            };
-            for (summary.col_names, 0..) |name, i| {
-                if (i > 0) stdout_writer.writeAll(", ") catch |err| {
-                    std.log.err("failed to write output: {}", .{err});
-                    std.process.exit(@intFromEnum(ExitCode.usage));
-                };
-                stdout_writer.print("{s} TEXT", .{name}) catch |err| {
-                    std.log.err("failed to write output: {}", .{err});
-                    std.process.exit(@intFromEnum(ExitCode.usage));
-                };
-            }
-            stdout_writer.writeAll(")\n") catch |err| {
-                std.log.err("failed to write output: {}", .{err});
-                std.process.exit(@intFromEnum(ExitCode.usage));
-            };
-        },
-    }
-}
-
-/// runSample(allocator, io, args, stderr_writer, stdout_writer) → void
-/// Pre:  args is valid; allocator and writers are valid; input_format is csv or tsv
-/// Post: a schema comment block is written to stderr (column names + inferred types,
-///       or all TEXT if args.type_inference is false, each line prefixed with "#") and
-///       a header row + first args.n data rows are written to stdout as delimited text.
-///       Exits 2 on parse error, 1 on stdout write error. No query required.
-fn runSample(
-    allocator: std.mem.Allocator,
-    io: std.Io,
-    args: SampleArgs,
-    stderr_writer: *std.Io.Writer,
-    stdout_writer: *std.Io.Writer,
-) void {
-    switch (args.input_format) {
-        .json, .ndjson, .xml => fatal(
-            "--sample is only supported with CSV and TSV input",
-            stderr_writer,
-            .usage,
-            .{},
-        ),
-        .csv, .tsv => {
-            const col_delim: []const u8 = if (args.input_format == .tsv) "\t" else args.delimiter;
-            var stdin_buf: [4096]u8 = undefined;
-            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
-            var csv_reader = csv.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, col_delim);
-
-            const header_record = csv_reader.nextRecord() catch |err| switch (err) {
-                error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}),
-                else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}),
-            } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{});
-            defer csv_reader.freeRecord(header_record);
-
-            const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
-                error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}),
-                error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}),
-                else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}),
-            };
-            defer {
-                for (cols) |col| allocator.free(col);
-                allocator.free(cols);
-            }
-
-            // Buffer max(inference_buffer_size, n) rows for type inference
-            const buf_size = @max(inference_buffer_size, args.n);
-            var row_buffer: std.ArrayList([][]u8) = .empty;
-            defer {
-                for (row_buffer.items) |row| csv_reader.freeRecord(row);
-                row_buffer.deinit(allocator);
-            }
-
-            var csv_row_count: usize = 1;
-            // Loop invariant I: row_buffer contains all non-empty data rows read so far (up to buf_size)
-            // Bounding function: buf_size - row_buffer.items.len
-            while (row_buffer.items.len < buf_size) {
-                const rec = csv_reader.nextRecord() catch |err| switch (err) {
-                    error.UnterminatedQuotedField => fatal(
-                        "row {d}: unterminated quoted field",
-                        stderr_writer,
-                        .csv_error,
-                        .{csv_row_count + 1},
-                    ),
-                    else => fatal("row {d}: failed to parse CSV", stderr_writer, .csv_error, .{csv_row_count + 1}),
-                } orelse break;
-                csv_row_count += 1;
-                if (rec.len == 0) {
-                    csv_reader.freeRecord(rec);
-                    continue;
-                }
-                row_buffer.append(allocator, rec) catch
-                    fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{});
-            }
-
-            const types: []ColumnType = if (args.type_inference) blk: {
-                break :blk inferTypes(allocator, row_buffer.items, cols.len) catch
-                    fatal("out of memory during type inference", stderr_writer, .csv_error, .{});
-            } else blk: {
-                const t = allocator.alloc(ColumnType, cols.len) catch
-                    fatal("out of memory", stderr_writer, .csv_error, .{});
-                @memset(t, .TEXT);
-                break :blk t;
-            };
-            defer allocator.free(types);
-
-            // ─── Print schema block to stderr ─────────────────────────────────────
-            // Compute max column name width for aligned output
-            var max_col_width: usize = 0;
-            for (cols) |col| max_col_width = @max(max_col_width, col.len);
-
-            stderr_writer.print("# Schema ({d} columns):\n", .{cols.len}) catch |err| {
-                std.log.err("failed to write schema: {}", .{err});
-            };
-            // Loop invariant I: cols[0..i] have been printed with aligned type annotation
-            // Bounding function: cols.len - i
-            for (cols, types) |col, t| {
-                stderr_writer.writeAll("#   ") catch |err| {
-                    std.log.err("failed to write schema: {}", .{err});
-                };
-                stderr_writer.writeAll(col) catch |err| {
-                    std.log.err("failed to write schema: {}", .{err});
-                };
-                // Pad to max_col_width + 2 spaces before the type
-                var p: usize = col.len;
-                while (p < max_col_width + 2) : (p += 1) {
-                    stderr_writer.writeByte(' ') catch |err| {
-                        std.log.err("failed to write schema: {}", .{err});
-                    };
-                }
-                stderr_writer.print("{s}\n", .{@tagName(t)}) catch |err| {
-                    std.log.err("failed to write schema: {}", .{err});
-                };
-            }
-            stderr_writer.flush() catch |err| std.log.err("failed to flush stderr: {}", .{err});
-
-            // ─── Print header row to stdout ────────────────────────────────────────
-            // Loop invariant I: cols[0..i] names have been written, separated by col_delim
-            // Bounding function: cols.len - i
-            for (cols, 0..) |col, i| {
-                if (i > 0) stdout_writer.writeAll(col_delim) catch
-                    fatal("failed to write header", stderr_writer, .csv_error, .{});
-                format.writeField(stdout_writer, col, col_delim) catch
-                    fatal("failed to write header", stderr_writer, .csv_error, .{});
-            }
-            stdout_writer.writeByte('\n') catch
-                fatal("failed to write header newline", stderr_writer, .csv_error, .{});
-
-            // ─── Print first n data rows to stdout ────────────────────────────────
-            const rows_to_print = @min(args.n, row_buffer.items.len);
-            // Loop invariant I: row_buffer[0..r] have been printed as delimited rows
-            // Bounding function: rows_to_print - r
-            for (row_buffer.items[0..rows_to_print]) |row| {
-                var col_idx: usize = 0;
-                // Loop invariant I: cols[0..col_idx] fields have been written for this row
-                // Bounding function: cols.len - col_idx
-                while (col_idx < cols.len) : (col_idx += 1) {
-                    if (col_idx > 0) stdout_writer.writeAll(col_delim) catch
-                        fatal("failed to write field separator", stderr_writer, .csv_error, .{});
-                    const val: []const u8 = if (col_idx < row.len) row[col_idx] else "";
-                    format.writeField(stdout_writer, val, col_delim) catch
-                        fatal("failed to write field", stderr_writer, .csv_error, .{});
-                }
-                stdout_writer.writeByte('\n') catch
-                    fatal("failed to write row newline", stderr_writer, .csv_error, .{});
-            }
-        },
-    }
-}
-
 /// run(allocator, io, parsed, stderr_writer, stdout_writer) → void
 /// Pre:  parsed contains a valid query; allocator and writers are valid
 /// Post: input from stdin has been loaded (dispatched on parsed.input_format),
@@ -986,7 +347,7 @@ pub fn main(init: std.process.Init.Minimal) void {
             std.process.exit(@intFromEnum(ExitCode.success));
         },
         .columns => |col_args| {
-            runColumns(allocator, io.io(), col_args, stderr_writer, stdout_writer);
+            columns_mode.runColumns(allocator, io.io(), col_args, stderr_writer, stdout_writer);
             stdout_file_writer.flush() catch |err| {
                 std.log.err("failed to flush stdout: {}", .{err});
             };
@@ -995,7 +356,7 @@ pub fn main(init: std.process.Init.Minimal) void {
             };
         },
         .validate => |val_args| {
-            runValidate(allocator, io.io(), val_args, stderr_writer, stdout_writer);
+            validate_mode.runValidate(allocator, io.io(), val_args, stderr_writer, stdout_writer);
             stdout_file_writer.flush() catch |err| {
                 std.log.err("failed to flush stdout: {}", .{err});
             };
@@ -1004,7 +365,7 @@ pub fn main(init: std.process.Init.Minimal) void {
             };
         },
         .sample => |sample_args| {
-            runSample(allocator, io.io(), sample_args, stderr_writer, stdout_writer);
+            sample_mode.runSample(allocator, io.io(), sample_args, stderr_writer, stdout_writer);
             stdout_file_writer.flush() catch |err| {
                 std.log.err("failed to flush stdout: {}", .{err});
             };
diff --git a/src/modes/columns.zig b/src/modes/columns.zig
new file mode 100644
index 0000000..2d3e199
--- /dev/null
+++ b/src/modes/columns.zig
@@ -0,0 +1,204 @@
+const std = @import("std");
+const csv_mod = @import("../csv.zig");
+const json_mod = @import("../json.zig");
+const xml_mod = @import("../xml.zig");
+const loader = @import("../loader.zig");
+const args_mod = @import("../args.zig");
+
+const inferTypes = loader.inferTypes;
+const parseHeader = loader.parseHeader;
+const inference_buffer_size = loader.inference_buffer_size;
+
+const ExitCode = enum(u8) {
+    success = 0,
+    usage = 1,
+    csv_error = 2,
+    sql_error = 3,
+};
+
+fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, f_args: anytype) noreturn {
+    writer.print("error: " ++ fmt ++ "\n", f_args) catch |err| {
+        std.log.err("failed to write error message: {}", .{err});
+    };
+    writer.flush() catch |err| std.log.err("failed to flush: {}", .{err});
+    std.process.exit(@intFromEnum(code));
+}
+
+pub fn runColumns(
+    allocator: std.mem.Allocator,
+    io: std.Io,
+    args: args_mod.ColumnsArgs,
+    stderr_writer: *std.Io.Writer,
+    stdout_writer: *std.Io.Writer,
+) void {
+    switch (args.input_format) {
+        .csv, .tsv => {
+            const col_delim: []const u8 = if (args.input_format == .tsv) "\t" else args.delimiter;
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+            var csv_reader = csv_mod.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, col_delim);
+
+            const header_record = csv_reader.nextRecord() catch |err| switch (err) {
+                error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}),
+                else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}),
+            } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{});
+            defer csv_reader.freeRecord(header_record);
+
+            const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
+                error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}),
+                error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}),
+                else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}),
+            };
+            defer {
+                for (cols) |col| allocator.free(col);
+                allocator.free(cols);
+            }
+
+            if (args.verbose) {
+                var row_buffer: std.ArrayList([][]u8) = .empty;
+                defer {
+                    for (row_buffer.items) |row| csv_reader.freeRecord(row);
+                    row_buffer.deinit(allocator);
+                }
+                var data_row: usize = 1;
+                while (row_buffer.items.len < inference_buffer_size) {
+                    data_row += 1;
+                    const rec = csv_reader.nextRecord() catch |err| switch (err) {
+                        error.UnterminatedQuotedField => fatal(
+                            "row {d}: unterminated quoted field",
+                            stderr_writer,
+                            .csv_error,
+                            .{data_row},
+                        ),
+                        else => fatal("row {d}: failed to parse CSV", stderr_writer, .csv_error, .{data_row}),
+                    } orelse break;
+                    if (rec.len == 0) {
+                        csv_reader.freeRecord(rec);
+                        continue;
+                    }
+                    row_buffer.append(allocator, rec) catch
+                        fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{});
+                }
+                const types = inferTypes(allocator, row_buffer.items, cols.len) catch
+                    fatal("out of memory during type inference", stderr_writer, .csv_error, .{});
+                defer allocator.free(types);
+                for (cols, types) |col, t| {
+                    stdout_writer.print("{s} {s}\n", .{ col, @tagName(t) }) catch |err| {
+                        std.log.err("failed to write output: {}", .{err});
+                    };
+                }
+            } else {
+                for (cols) |col| {
+                    stdout_writer.print("{s}\n", .{col}) catch |err| {
+                        std.log.err("failed to write output: {}", .{err});
+                    };
+                }
+            }
+        },
+        .json => {
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+
+            var buf: std.ArrayList(u8) = .empty;
+            defer buf.deinit(allocator);
+            while (true) {
+                const byte = stdin_file_reader.interface.takeByte() catch |err| switch (err) {
+                    error.EndOfStream => break,
+                    error.ReadFailed => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}),
+                };
+                buf.append(allocator, byte) catch fatal("out of memory reading JSON", stderr_writer, .csv_error, .{});
+            }
+            if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{});
+
+            var parsed = std.json.parseFromSlice(std.json.Value, allocator, buf.items, .{}) catch
+                fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
+            defer parsed.deinit();
+
+            const array = switch (parsed.value) {
+                .array => |a| a,
+                else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
+            };
+            if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{});
+
+            const first_obj = switch (array.items[0]) {
+                .object => |o| o,
+                else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}),
+            };
+
+            var ki = first_obj.iterator();
+            while (ki.next()) |entry| {
+                if (args.verbose) {
+                    stdout_writer.print("{s} TEXT\n", .{entry.key_ptr.*}) catch |err| {
+                        std.log.err("failed to write output: {}", .{err});
+                    };
+                } else {
+                    stdout_writer.print("{s}\n", .{entry.key_ptr.*}) catch |err| {
+                        std.log.err("failed to write output: {}", .{err});
+                    };
+                }
+            }
+        },
+        .ndjson => {
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+
+            // Read until we find a non-empty line
+            var line_num: usize = 0;
+            while (true) {
+                line_num += 1;
+                const line = json_mod.readLine(allocator, &stdin_file_reader.interface) catch |err| switch (err) {
+                    error.OutOfMemory => fatal("out of memory reading NDJSON", stderr_writer, .csv_error, .{}),
+                    error.ReadFailed => fatal("line {d}: failed to read NDJSON", stderr_writer, .csv_error, .{line_num}),
+                } orelse fatal("empty NDJSON input", stderr_writer, .csv_error, .{});
+                defer allocator.free(line);
+
+                const trimmed = std.mem.trim(u8, line, " \t\r");
+                if (trimmed.len == 0) { line_num -= 1; continue; }
+
+                var parsed = std.json.parseFromSlice(std.json.Value, allocator, trimmed, .{}) catch
+                    fatal("line 1: failed to parse NDJSON", stderr_writer, .csv_error, .{});
+                defer parsed.deinit();
+
+                const obj = switch (parsed.value) {
+                    .object => |o| o,
+                    else => fatal("line 1: NDJSON element must be a JSON object", stderr_writer, .csv_error, .{}),
+                };
+
+                var ki = obj.iterator();
+                while (ki.next()) |entry| {
+                    if (args.verbose) {
+                        stdout_writer.print("{s} TEXT\n", .{entry.key_ptr.*}) catch |err| {
+                            std.log.err("failed to write output: {}", .{err});
+                        };
+                    } else {
+                        stdout_writer.print("{s}\n", .{entry.key_ptr.*}) catch |err| {
+                            std.log.err("failed to write output: {}", .{err});
+                        };
+                    }
+                }
+                break;
+            }
+        },
+        .xml => {
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+
+            const names = xml_mod.getXmlColumnNames(allocator, &stdin_file_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer);
+            defer {
+                for (names) |name| allocator.free(name);
+                allocator.free(names);
+            }
+            for (names) |name| {
+                if (args.verbose) {
+                    stdout_writer.print("{s} TEXT\n", .{name}) catch |err| {
+                        std.log.err("failed to write output: {}", .{err});
+                    };
+                } else {
+                    stdout_writer.print("{s}\n", .{name}) catch |err| {
+                        std.log.err("failed to write output: {}", .{err});
+                    };
+                }
+            }
+        },
+    }
+}
diff --git a/src/modes/sample.zig b/src/modes/sample.zig
new file mode 100644
index 0000000..084a270
--- /dev/null
+++ b/src/modes/sample.zig
@@ -0,0 +1,167 @@
+const std = @import("std");
+const csv_mod = @import("../csv.zig");
+const sqlite_mod = @import("../sqlite.zig");
+const loader = @import("../loader.zig");
+const args_mod = @import("../args.zig");
+const format = @import("../format.zig");
+
+const ColumnType = sqlite_mod.ColumnType;
+const inferTypes = loader.inferTypes;
+const parseHeader = loader.parseHeader;
+const inference_buffer_size = loader.inference_buffer_size;
+
+const ExitCode = enum(u8) {
+    success = 0,
+    usage = 1,
+    csv_error = 2,
+    sql_error = 3,
+};
+
+fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, f_args: anytype) noreturn {
+    writer.print("error: " ++ fmt ++ "\n", f_args) catch |err| {
+        std.log.err("failed to write error message: {}", .{err});
+    };
+    writer.flush() catch |err| std.log.err("failed to flush: {}", .{err});
+    std.process.exit(@intFromEnum(code));
+}
+
+pub fn runSample(
+    allocator: std.mem.Allocator,
+    io: std.Io,
+    args: args_mod.SampleArgs,
+    stderr_writer: *std.Io.Writer,
+    stdout_writer: *std.Io.Writer,
+) void {
+    switch (args.input_format) {
+        .json, .ndjson, .xml => fatal(
+            "--sample is only supported with CSV and TSV input",
+            stderr_writer,
+            .usage,
+            .{},
+        ),
+        .csv, .tsv => {
+            const col_delim: []const u8 = if (args.input_format == .tsv) "\t" else args.delimiter;
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+            var csv_reader = csv_mod.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, col_delim);
+
+            const header_record = csv_reader.nextRecord() catch |err| switch (err) {
+                error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}),
+                else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}),
+            } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{});
+            defer csv_reader.freeRecord(header_record);
+
+            const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
+                error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}),
+                error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}),
+                else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}),
+            };
+            defer {
+                for (cols) |col| allocator.free(col);
+                allocator.free(cols);
+            }
+
+            // Buffer max(inference_buffer_size, n) rows for type inference
+            const buf_size = @max(inference_buffer_size, args.n);
+            var row_buffer: std.ArrayList([][]u8) = .empty;
+            defer {
+                for (row_buffer.items) |row| csv_reader.freeRecord(row);
+                row_buffer.deinit(allocator);
+            }
+
+            var csv_row_count: usize = 1;
+            // Loop invariant I: row_buffer contains all non-empty data rows read so far (up to buf_size)
+            // Bounding function: buf_size - row_buffer.items.len
+            while (row_buffer.items.len < buf_size) {
+                const rec = csv_reader.nextRecord() catch |err| switch (err) {
+                    error.UnterminatedQuotedField => fatal(
+                        "row {d}: unterminated quoted field",
+                        stderr_writer,
+                        .csv_error,
+                        .{csv_row_count + 1},
+                    ),
+                    else => fatal("row {d}: failed to parse CSV", stderr_writer, .csv_error, .{csv_row_count + 1}),
+                } orelse break;
+                csv_row_count += 1;
+                if (rec.len == 0) {
+                    csv_reader.freeRecord(rec);
+                    continue;
+                }
+                row_buffer.append(allocator, rec) catch
+                    fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{});
+            }
+
+            const types: []ColumnType = if (args.type_inference) blk: {
+                break :blk inferTypes(allocator, row_buffer.items, cols.len) catch
+                    fatal("out of memory during type inference", stderr_writer, .csv_error, .{});
+            } else blk: {
+                const t = allocator.alloc(ColumnType, cols.len) catch
+                    fatal("out of memory", stderr_writer, .csv_error, .{});
+                @memset(t, .TEXT);
+                break :blk t;
+            };
+            defer allocator.free(types);
+
+            // ─── Print schema block to stderr ─────────────────────────────────────
+            // Compute max column name width for aligned output
+            var max_col_width: usize = 0;
+            for (cols) |col| max_col_width = @max(max_col_width, col.len);
+
+            stderr_writer.print("# Schema ({d} columns):\n", .{cols.len}) catch |err| {
+                std.log.err("failed to write schema: {}", .{err});
+            };
+            // Loop invariant I: cols[0..i] have been printed with aligned type annotation
+            // Bounding function: cols.len - i
+            for (cols, types) |col, t| {
+                stderr_writer.writeAll("#   ") catch |err| {
+                    std.log.err("failed to write schema: {}", .{err});
+                };
+                stderr_writer.writeAll(col) catch |err| {
+                    std.log.err("failed to write schema: {}", .{err});
+                };
+                // Pad to max_col_width + 2 spaces before the type
+                var p: usize = col.len;
+                while (p < max_col_width + 2) : (p += 1) {
+                    stderr_writer.writeByte(' ') catch |err| {
+                        std.log.err("failed to write schema: {}", .{err});
+                    };
+                }
+                stderr_writer.print("{s}\n", .{@tagName(t)}) catch |err| {
+                    std.log.err("failed to write schema: {}", .{err});
+                };
+            }
+            stderr_writer.flush() catch |err| std.log.err("failed to flush stderr: {}", .{err});
+
+            // ─── Print header row to stdout ────────────────────────────────────────
+            // Loop invariant I: cols[0..i] names have been written, separated by col_delim
+            // Bounding function: cols.len - i
+            for (cols, 0..) |col, i| {
+                if (i > 0) stdout_writer.writeAll(col_delim) catch
+                    fatal("failed to write header", stderr_writer, .csv_error, .{});
+                format.writeField(stdout_writer, col, col_delim) catch
+                    fatal("failed to write header", stderr_writer, .csv_error, .{});
+            }
+            stdout_writer.writeByte('\n') catch
+                fatal("failed to write header newline", stderr_writer, .csv_error, .{});
+
+            // ─── Print first n data rows to stdout ────────────────────────────────
+            const rows_to_print = @min(args.n, row_buffer.items.len);
+            // Loop invariant I: row_buffer[0..r] have been printed as delimited rows
+            // Bounding function: rows_to_print - r
+            for (row_buffer.items[0..rows_to_print]) |row| {
+                var col_idx: usize = 0;
+                // Loop invariant I: cols[0..col_idx] fields have been written for this row
+                // Bounding function: cols.len - col_idx
+                while (col_idx < cols.len) : (col_idx += 1) {
+                    if (col_idx > 0) stdout_writer.writeAll(col_delim) catch
+                        fatal("failed to write field separator", stderr_writer, .csv_error, .{});
+                    const val: []const u8 = if (col_idx < row.len) row[col_idx] else "";
+                    format.writeField(stdout_writer, val, col_delim) catch
+                        fatal("failed to write field", stderr_writer, .csv_error, .{});
+                }
+                stdout_writer.writeByte('\n') catch
+                    fatal("failed to write row newline", stderr_writer, .csv_error, .{});
+            }
+        },
+    }
+}
diff --git a/src/modes/validate.zig b/src/modes/validate.zig
new file mode 100644
index 0000000..6926328
--- /dev/null
+++ b/src/modes/validate.zig
@@ -0,0 +1,323 @@
+const std = @import("std");
+const csv_mod = @import("../csv.zig");
+const json_mod = @import("../json.zig");
+const xml_mod = @import("../xml.zig");
+const sqlite_mod = @import("../sqlite.zig");
+const loader = @import("../loader.zig");
+const args_mod = @import("../args.zig");
+
+const ColumnType = sqlite_mod.ColumnType;
+const inferTypes = loader.inferTypes;
+const parseHeader = loader.parseHeader;
+const fmtThousands = loader.fmtThousands;
+const inference_buffer_size = loader.inference_buffer_size;
+
+const ExitCode = enum(u8) {
+    success = 0,
+    usage = 1,
+    csv_error = 2,
+    sql_error = 3,
+};
+
+fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, f_args: anytype) noreturn {
+    writer.print("error: " ++ fmt ++ "\n", f_args) catch |err| {
+        std.log.err("failed to write error message: {}", .{err});
+    };
+    writer.flush() catch |err| std.log.err("failed to flush: {}", .{err});
+    std.process.exit(@intFromEnum(code));
+}
+
+pub fn runValidate(
+    allocator: std.mem.Allocator,
+    io: std.Io,
+    args: args_mod.ValidateArgs,
+    stderr_writer: *std.Io.Writer,
+    stdout_writer: *std.Io.Writer,
+) void {
+    switch (args.input_format) {
+        .csv, .tsv => {
+            const col_delim: []const u8 = if (args.input_format == .tsv) "\t" else args.delimiter;
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+            var csv_reader = csv_mod.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, col_delim);
+
+            const header_record = csv_reader.nextRecord() catch |err| switch (err) {
+                error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}),
+                else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}),
+            } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{});
+            defer csv_reader.freeRecord(header_record);
+
+            const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
+                error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}),
+                error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}),
+                else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}),
+            };
+            defer {
+                for (cols) |col| allocator.free(col);
+                allocator.free(cols);
+            }
+
+            const num_cols = cols.len;
+            var csv_row_count: usize = 1; // header already read
+            var data_row_count: usize = 0;
+
+            var row_buffer: std.ArrayList([][]u8) = .empty;
+            defer {
+                for (row_buffer.items) |row| csv_reader.freeRecord(row);
+                row_buffer.deinit(allocator);
+            }
+
+            // Buffer up to inference_buffer_size rows for type inference
+            while (row_buffer.items.len < inference_buffer_size) {
+                const rec = csv_reader.nextRecord() catch |err| switch (err) {
+                    error.UnterminatedQuotedField => fatal(
+                        "row {d}: unterminated quoted field",
+                        stderr_writer,
+                        .csv_error,
+                        .{csv_row_count + 1},
+                    ),
+                    else => fatal(
+                        "row {d}: failed to parse CSV",
+                        stderr_writer,
+                        .csv_error,
+                        .{csv_row_count + 1},
+                    ),
+                } orelse break;
+                csv_row_count += 1;
+                if (rec.len == 0) {
+                    csv_reader.freeRecord(rec);
+                    continue;
+                }
+                data_row_count += 1;
+                row_buffer.append(allocator, rec) catch
+                    fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{});
+            }
+
+            const types: []ColumnType = if (args.type_inference) blk: {
+                break :blk inferTypes(allocator, row_buffer.items, num_cols) catch
+                    fatal("out of memory during type inference", stderr_writer, .csv_error, .{});
+            } else blk: {
+                const t = allocator.alloc(ColumnType, num_cols) catch
+                    fatal("out of memory", stderr_writer, .csv_error, .{});
+                @memset(t, .TEXT);
+                break :blk t;
+            };
+            defer allocator.free(types);
+
+            // Stream remaining rows and count them
+            while (true) {
+                const record = csv_reader.nextRecord() catch |err| switch (err) {
+                    error.UnterminatedQuotedField => fatal(
+                        "row {d}: unterminated quoted field",
+                        stderr_writer,
+                        .csv_error,
+                        .{csv_row_count + 1},
+                    ),
+                    else => fatal(
+                        "row {d}: failed to parse CSV",
+                        stderr_writer,
+                        .csv_error,
+                        .{csv_row_count + 1},
+                    ),
+                } orelse break;
+                csv_row_count += 1;
+                defer csv_reader.freeRecord(record);
+                if (record.len == 0) continue;
+                data_row_count += 1;
+            }
+
+            var count_buf: [32]u8 = undefined;
+            const count_str = fmtThousands(&count_buf, data_row_count);
+
+            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, num_cols }) catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+
+            for (cols, types, 0..) |col, t, i| {
+                if (i > 0) {
+                    stdout_writer.writeAll(", ") catch |err| {
+                        std.log.err("failed to write output: {}", .{err});
+                        std.process.exit(@intFromEnum(ExitCode.usage));
+                    };
+                }
+                stdout_writer.print("{s} {s}", .{ col, @tagName(t) }) catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+            }
+            stdout_writer.writeAll(")\n") catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+        },
+        .json => {
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+
+            var buf: std.ArrayList(u8) = .empty;
+            defer buf.deinit(allocator);
+            while (true) {
+                const byte = stdin_file_reader.interface.takeByte() catch |err| switch (err) {
+                    error.EndOfStream => break,
+                    error.ReadFailed => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}),
+                };
+                buf.append(allocator, byte) catch fatal("out of memory reading JSON", stderr_writer, .csv_error, .{});
+            }
+            if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{});
+
+            var parsed = std.json.parseFromSlice(std.json.Value, allocator, buf.items, .{}) catch
+                fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
+            defer parsed.deinit();
+
+            const array = switch (parsed.value) {
+                .array => |a| a,
+                else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
+            };
+            if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{});
+
+            const first_obj = switch (array.items[0]) {
+                .object => |o| o,
+                else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}),
+            };
+
+            var num_cols: usize = 0;
+            var ki = first_obj.iterator();
+            while (ki.next()) |_| num_cols += 1;
+
+            var count_buf: [32]u8 = undefined;
+            const count_str = fmtThousands(&count_buf, array.items.len);
+            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, num_cols }) catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+            ki = first_obj.iterator();
+            var col_i: usize = 0;
+            while (ki.next()) |entry| : (col_i += 1) {
+                if (col_i > 0) stdout_writer.writeAll(", ") catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+                stdout_writer.print("{s} TEXT", .{entry.key_ptr.*}) catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+            }
+            stdout_writer.writeAll(")\n") catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+        },
+        .ndjson => {
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+
+            var line_num: usize = 0;
+            var row_count: usize = 0;
+            var cols_owned: ?[][]u8 = null;
+            defer if (cols_owned) |cs| {
+                for (cs) |col| allocator.free(col);
+                allocator.free(cs);
+            };
+
+            while (true) {
+                line_num += 1;
+                const line = json_mod.readLine(allocator, &stdin_file_reader.interface) catch |err| switch (err) {
+                    error.OutOfMemory => fatal("out of memory reading NDJSON", stderr_writer, .csv_error, .{}),
+                    error.ReadFailed => fatal("line {d}: failed to read NDJSON", stderr_writer, .csv_error, .{line_num}),
+                } orelse break;
+                defer allocator.free(line);
+
+                const trimmed = std.mem.trim(u8, line, " \t\r");
+                if (trimmed.len == 0) {
+                    line_num -= 1;
+                    continue;
+                }
+
+                var parsed_line = std.json.parseFromSlice(std.json.Value, allocator, trimmed, .{}) catch
+                    fatal("line {d}: failed to parse NDJSON", stderr_writer, .csv_error, .{line_num});
+                defer parsed_line.deinit();
+
+                const obj = switch (parsed_line.value) {
+                    .object => |o| o,
+                    else => fatal("line {d}: NDJSON element must be a JSON object", stderr_writer, .csv_error, .{line_num}),
+                };
+
+                if (cols_owned == null) {
+                    var col_list: std.ArrayList([]u8) = .empty;
+                    errdefer {
+                        for (col_list.items) |col| allocator.free(col);
+                        col_list.deinit(allocator);
+                    }
+                    var ki = obj.iterator();
+                    while (ki.next()) |entry| {
+                        const owned_key = allocator.dupe(u8, entry.key_ptr.*) catch
+                            fatal("out of memory building column list", stderr_writer, .csv_error, .{});
+                        col_list.append(allocator, owned_key) catch
+                            fatal("out of memory building column list", stderr_writer, .csv_error, .{});
+                    }
+                    if (col_list.items.len == 0)
+                        fatal("line 1: first NDJSON object has no keys", stderr_writer, .csv_error, .{});
+                    cols_owned = col_list.toOwnedSlice(allocator) catch
+                        fatal("out of memory", stderr_writer, .csv_error, .{});
+                }
+                row_count += 1;
+            }
+
+            if (cols_owned == null) fatal("empty NDJSON input", stderr_writer, .csv_error, .{});
+
+            const cols = cols_owned.?;
+            var count_buf: [32]u8 = undefined;
+            const count_str = fmtThousands(&count_buf, row_count);
+            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, cols.len }) catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+            for (cols, 0..) |col, i| {
+                if (i > 0) stdout_writer.writeAll(", ") catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+                stdout_writer.print("{s} TEXT", .{col}) catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+            }
+            stdout_writer.writeAll(")\n") catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+        },
+        .xml => {
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+
+            const summary = xml_mod.summarizeXml(allocator, &stdin_file_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer);
+            defer {
+                for (summary.col_names) |name| allocator.free(name);
+                allocator.free(summary.col_names);
+            }
+
+            var count_buf: [32]u8 = undefined;
+            const count_str = fmtThousands(&count_buf, summary.row_count);
+            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, summary.col_names.len }) catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+            for (summary.col_names, 0..) |name, i| {
+                if (i > 0) stdout_writer.writeAll(", ") catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+                stdout_writer.print("{s} TEXT", .{name}) catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+            }
+            stdout_writer.writeAll(")\n") catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+        },
+    }
+}

From e07fd9e7a5fbb8c367ab98f7955a6f2c5dabab99 Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@gmail.com>
Date: Fri, 8 May 2026 14:37:29 +0200
Subject: [PATCH 6/6] =?UTF-8?q?fix:=20address=20code=20review=20=E2=80=94?=
 =?UTF-8?q?=20consolidate=20ExitCode,=20remove=20dead=20code,=20guard=20XM?=
 =?UTF-8?q?L=20validation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/args.zig           | 20 ++++++++++--
 src/format.zig         |  6 +++-
 src/json.zig           | 50 +++++++++++++++---------------
 src/loader.zig         | 60 +++++++++++-------------------------
 src/main.zig           | 13 +-------
 src/modes/columns.zig  |  7 +----
 src/modes/sample.zig   |  7 +----
 src/modes/validate.zig |  7 +----
 src/sqlite.zig         | 70 ++++++++++++++++++++----------------------
 src/xml.zig            | 70 ++++++++++++++++++++----------------------
 10 files changed, 136 insertions(+), 174 deletions(-)

diff --git a/src/args.zig b/src/args.zig
index c8e2c47..9238e60 100644
--- a/src/args.zig
+++ b/src/args.zig
@@ -6,6 +6,18 @@ const format = @import("format.zig");
 const InputFormat = format.InputFormat;
 const OutputFormat = format.OutputFormat;
 
+/// Structured exit codes for scripting.
+///   0 = success
+///   1 = usage error (missing query, bad flag)
+///   2 = CSV/parse error
+///   3 = SQL error
+pub const ExitCode = enum(u8) {
+    success = 0,
+    usage = 1,
+    csv_error = 2,
+    sql_error = 3,
+};
+
 pub const SqlPipeError = error{
     MissingQuery,
     InvalidDelimiter,
@@ -387,9 +399,11 @@ pub fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
     if (silent and verbose)
         return error.SilentVerboseConflict;
 
-    // --xml-root and --xml-row must be valid XML element names
-    if (!isValidXmlName(xml_root) or !isValidXmlName(xml_row))
-        return error.InvalidXmlName;
+    // --xml-root and --xml-row must be valid XML element names (only validated in XML mode)
+    if (input_format == .xml or output_format == .xml) {
+        if (!isValidXmlName(xml_root) or !isValidXmlName(xml_row))
+            return error.InvalidXmlName;
+    }
 
     // --columns mode: list headers and exit
     if (list_columns)
diff --git a/src/format.zig b/src/format.zig
index fdbed0a..448b1b8 100644
--- a/src/format.zig
+++ b/src/format.zig
@@ -104,6 +104,8 @@ pub const OutputWriter = struct {
     /// Allocated in begin(); freed in deinit().
     col_names: []const [*:0]const u8,
     col_count: c_int,
+    /// True when col_names was heap-allocated in begin(); false when begin() was never called.
+    col_names_allocated: bool,
 
     /// Create a new OutputWriter. Call begin() before the first writeRow().
     pub fn init(format: OutputFormat, opts: WriteOpts) OutputWriter {
@@ -113,13 +115,14 @@ pub const OutputWriter = struct {
             .first_row = true,
             .col_names = &.{},
             .col_count = 0,
+            .col_names_allocated = false,
         };
     }
 
     /// Release any memory allocated during begin().
     /// Safe to call even when begin() was never called.
     pub fn deinit(self: *OutputWriter, allocator: std.mem.Allocator) void {
-        if (self.col_names.len > 0) {
+        if (self.col_names_allocated) {
             allocator.free(self.col_names);
         }
         self.* = undefined;
@@ -150,6 +153,7 @@ pub const OutputWriter = struct {
                     names[@intCast(i)] = c.sqlite3_column_name(stmt, i);
                 }
                 self.col_names = names;
+                self.col_names_allocated = true;
             },
             .csv, .tsv => {
                 if (self.opts.header and col_count > 0)
diff --git a/src/json.zig b/src/json.zig
index 77e9311..0732ea2 100644
--- a/src/json.zig
+++ b/src/json.zig
@@ -29,9 +29,7 @@ const prepareInsertStmt = sqlite_helpers.prepareInsertStmt;
 const beginTransaction = sqlite_helpers.beginTransaction;
 const commitTransaction = sqlite_helpers.commitTransaction;
 const fatal = sqlite_helpers.fatal;
-const exit_usage = sqlite_helpers.exit_usage;
-const exit_parse = sqlite_helpers.exit_parse;
-const exit_sql = sqlite_helpers.exit_sql;
+const ExitCode = sqlite_helpers.ExitCode;
 const sqlite_static = sqlite_helpers.sqlite_static;
 
 // ─── Shared helpers ───────────────────────────────────
@@ -189,28 +187,28 @@ pub fn loadJsonArray(
     while (true) {
         const byte = reader.takeByte() catch |err| switch (err) {
             error.EndOfStream => break,
-            error.ReadFailed => fatal("failed to read JSON input", stderr_writer, exit_parse, .{}),
+            error.ReadFailed => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}),
         };
-        buf.append(allocator, byte) catch fatal("out of memory reading JSON input", stderr_writer, exit_parse, .{});
+        buf.append(allocator, byte) catch fatal("out of memory reading JSON input", stderr_writer, .csv_error, .{});
     }
 
-    if (buf.items.len == 0) fatal("empty input", stderr_writer, exit_parse, .{});
+    if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{});
 
     var parsed = std.json.parseFromSlice(std.json.Value, allocator, buf.items, .{}) catch
-        fatal("failed to parse JSON input", stderr_writer, exit_parse, .{});
+        fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
     defer parsed.deinit();
 
     const array = switch (parsed.value) {
         .array => |a| a,
-        else => fatal("JSON input must be an array of objects", stderr_writer, exit_parse, .{}),
+        else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
     };
 
-    if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, exit_parse, .{});
+    if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{});
 
     // Extract column names from the first object's keys (insertion order)
     const first_obj = switch (array.items[0]) {
         .object => |o| o,
-        else => fatal("JSON array elements must be objects", stderr_writer, exit_parse, .{}),
+        else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}),
     };
 
     var cols: std.ArrayList([]const u8) = .empty;
@@ -218,9 +216,9 @@ pub fn loadJsonArray(
     var key_iter = first_obj.iterator();
     while (key_iter.next()) |entry| {
         cols.append(allocator, entry.key_ptr.*) catch
-            fatal("out of memory building column list", stderr_writer, exit_parse, .{});
+            fatal("out of memory building column list", stderr_writer, .csv_error, .{});
     }
-    if (cols.items.len == 0) fatal("first JSON object has no keys", stderr_writer, exit_parse, .{});
+    if (cols.items.len == 0) fatal("first JSON object has no keys", stderr_writer, .csv_error, .{});
 
     // Create all-TEXT table (column names are owned by parsed arena — valid until parsed.deinit())
     createAllTextTable(allocator, db, cols.items, stderr_writer);
@@ -235,15 +233,15 @@ pub fn loadJsonArray(
     for (array.items) |item| {
         const obj = switch (item) {
             .object => |o| o,
-            else => fatal("JSON array element is not an object", stderr_writer, exit_parse, .{}),
+            else => fatal("JSON array element is not an object", stderr_writer, .csv_error, .{}),
         };
         rows_inserted += 1;
         if (max_rows) |limit| {
             if (rows_inserted > limit)
-                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, exit_usage, .{limit});
+                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
         }
         insertRowFromJson(allocator, stmt, cols.items, obj) catch
-            fatal("{s}", stderr_writer, exit_sql, .{std.mem.span(c.sqlite3_errmsg(db))});
+            fatal("{s}", stderr_writer, .sql_error, .{std.mem.span(c.sqlite3_errmsg(db))});
     }
 
     commitTransaction(db, stderr_writer);
@@ -283,8 +281,8 @@ pub fn loadNdjsonInput(
     while (true) {
         line_num += 1;
         const line = readLine(allocator, reader) catch |err| switch (err) {
-            error.OutOfMemory => fatal("out of memory reading NDJSON", stderr_writer, exit_parse, .{}),
-            error.ReadFailed => fatal("line {d}: failed to read NDJSON input", stderr_writer, exit_parse, .{line_num}),
+            error.OutOfMemory => fatal("out of memory reading NDJSON", stderr_writer, .csv_error, .{}),
+            error.ReadFailed => fatal("line {d}: failed to read NDJSON input", stderr_writer, .csv_error, .{line_num}),
         } orelse break;
         defer allocator.free(line);
 
@@ -295,12 +293,12 @@ pub fn loadNdjsonInput(
         }
 
         var parsed_line = std.json.parseFromSlice(std.json.Value, allocator, trimmed, .{}) catch
-            fatal("line {d}: failed to parse NDJSON", stderr_writer, exit_parse, .{line_num});
+            fatal("line {d}: failed to parse NDJSON", stderr_writer, .csv_error, .{line_num});
         defer parsed_line.deinit();
 
         const obj = switch (parsed_line.value) {
             .object => |o| o,
-            else => fatal("line {d}: NDJSON element must be a JSON object", stderr_writer, exit_parse, .{line_num}),
+            else => fatal("line {d}: NDJSON element must be a JSON object", stderr_writer, .csv_error, .{line_num}),
         };
 
         if (cols_owned == null) {
@@ -313,15 +311,15 @@ pub fn loadNdjsonInput(
             var ki = obj.iterator();
             while (ki.next()) |entry| {
                 const owned_key = allocator.dupe(u8, entry.key_ptr.*) catch
-                    fatal("out of memory building column list", stderr_writer, exit_parse, .{});
+                    fatal("out of memory building column list", stderr_writer, .csv_error, .{});
                 col_list.append(allocator, owned_key) catch
-                    fatal("out of memory building column list", stderr_writer, exit_parse, .{});
+                    fatal("out of memory building column list", stderr_writer, .csv_error, .{});
             }
             if (col_list.items.len == 0)
-                fatal("line 1: first NDJSON object has no keys", stderr_writer, exit_parse, .{});
+                fatal("line 1: first NDJSON object has no keys", stderr_writer, .csv_error, .{});
 
             cols_owned = col_list.toOwnedSlice(allocator) catch
-                fatal("out of memory", stderr_writer, exit_parse, .{});
+                fatal("out of memory", stderr_writer, .csv_error, .{});
 
             const cols_const: []const []const u8 = @ptrCast(cols_owned.?);
             createAllTextTable(allocator, db, cols_const, stderr_writer);
@@ -334,16 +332,16 @@ pub fn loadNdjsonInput(
         rows_inserted += 1;
         if (max_rows) |limit| {
             if (rows_inserted > limit)
-                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, exit_usage, .{limit});
+                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
         }
 
         const cols_const: []const []const u8 = @ptrCast(cols_owned.?);
         insertRowFromJson(allocator, insert_stmt.?, cols_const, obj) catch
-            fatal("line {d}: {s}", stderr_writer, exit_sql, .{ line_num, std.mem.span(c.sqlite3_errmsg(db)) });
+            fatal("line {d}: {s}", stderr_writer, .sql_error, .{ line_num, std.mem.span(c.sqlite3_errmsg(db)) });
     }
 
     if (cols_owned == null)
-        fatal("empty NDJSON input", stderr_writer, exit_parse, .{});
+        fatal("empty NDJSON input", stderr_writer, .csv_error, .{});
 
     if (in_transaction) commitTransaction(db, stderr_writer);
     return rows_inserted;
diff --git a/src/loader.zig b/src/loader.zig
index 77d0906..55e2b8f 100644
--- a/src/loader.zig
+++ b/src/loader.zig
@@ -18,19 +18,6 @@ pub const inference_buffer_size: usize = 100;
 /// Number of rows between progress indicator updates.
 pub const progress_interval: usize = 10_000;
 
-/// stripQuotes(raw) → []const u8
-/// Pre:  raw is a valid UTF-8 slice
-/// Post: if raw = '"' ++ inner ++ '"'  =>  result = inner
-///       otherwise                     =>  result = raw
-/// Note: RFC 4180 quoted-field unescaping is handled by csv.zig; this function
-///       provides an explicit, single-location implementation for any residual
-///       direct string handling that bypasses the CSV parser.
-fn stripQuotes(raw: []const u8) []const u8 {
-    if (raw.len >= 2 and raw[0] == '"' and raw[raw.len - 1] == '"')
-        return raw[1 .. raw.len - 1];
-    return raw;
-}
-
 /// isInteger(val) → bool
 /// Pre:  val is a valid UTF-8 slice
 /// Post: result = val matches [+-]?[0-9]+  (non-empty, only digits after optional sign)
@@ -189,28 +176,17 @@ pub fn parseHeader(
     return cols.toOwnedSlice(allocator);
 }
 
-/// insertRowTyped(stmt, db, row, types, param_count) → void
+/// insertRowTyped(stmt, row, types, param_count) → void
 /// Pre:  stmt is a prepared INSERT with param_count parameters, freshly reset
 ///       row is a non-empty CSV record (slice of field slices)
 ///       types.len = param_count (or shorter → remaining treated as TEXT)
-///       db is the database that owns stmt (used for error reporting by caller)
 /// Post: each field is bound to its parameter using the appropriate SQLite bind
-///       function according to types[j]:
-///         INTEGER → sqlite3_bind_int64  (fallback: TEXT on parse failure)
-///         REAL    → sqlite3_bind_double (fallback: TEXT on parse failure)
-///         TEXT    → sqlite3_bind_text
-///       empty / missing values → sqlite3_bind_null
-///       sqlite3_step returned SQLITE_DONE
-///       error.BindFailed / error.StepFailed on SQLite errors
 pub fn insertRowTyped(
     stmt: *c.sqlite3_stmt,
-    db: *c.sqlite3,
     row: [][]u8,
     types: []const ColumnType,
     param_count: c_int,
 ) args_mod.SqlPipeError!void {
-    _ = db;
-
     _ = c.sqlite3_reset(stmt);
     _ = c.sqlite3_clear_bindings(stmt);
 
@@ -332,15 +308,15 @@ pub fn loadCsvInput(
     var csv_reader = csv_mod.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, parsed.delimiter);
 
     const header_record = csv_reader.nextRecord() catch |err| switch (err) {
-        error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, sqlite_mod.exit_parse, .{}),
-        else => fatal("row 1: failed to parse CSV header", stderr_writer, sqlite_mod.exit_parse, .{}),
-    } orelse fatal("empty input (no header row)", stderr_writer, sqlite_mod.exit_parse, .{});
+        error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}),
+        else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}),
+    } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{});
     defer csv_reader.freeRecord(header_record);
 
     const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
-        error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, sqlite_mod.exit_parse, .{}),
-        error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, sqlite_mod.exit_parse, .{}),
-        else => fatal("row 1: failed to parse header", stderr_writer, sqlite_mod.exit_parse, .{}),
+        error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}),
+        error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}),
+        else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}),
     };
     defer {
         for (cols) |col| allocator.free(col);
@@ -363,13 +339,13 @@ pub fn loadCsvInput(
                 error.UnterminatedQuotedField => fatal(
                     "row {d}: unterminated quoted field",
                     stderr_writer,
-                    sqlite_mod.exit_parse,
+                    .csv_error,
                     .{csv_row_count + 1},
                 ),
                 else => fatal(
                     "row {d}: failed to parse CSV",
                     stderr_writer,
-                    sqlite_mod.exit_parse,
+                    .csv_error,
                     .{csv_row_count + 1},
                 ),
             } orelse break;
@@ -379,13 +355,13 @@ pub fn loadCsvInput(
                 continue;
             }
             row_buffer.append(allocator, rec) catch
-                fatal("out of memory while buffering rows", stderr_writer, sqlite_mod.exit_parse, .{});
+                fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{});
         }
         break :blk inferTypes(allocator, row_buffer.items, num_cols) catch
-            fatal("out of memory during type inference", stderr_writer, sqlite_mod.exit_parse, .{});
+            fatal("out of memory during type inference", stderr_writer, .csv_error, .{});
     } else blk: {
         const t = allocator.alloc(ColumnType, num_cols) catch
-            fatal("out of memory", stderr_writer, sqlite_mod.exit_parse, .{});
+            fatal("out of memory", stderr_writer, .csv_error, .{});
         @memset(t, .TEXT);
         break :blk t;
     };
@@ -414,9 +390,9 @@ pub fn loadCsvInput(
         rows_inserted += 1;
         if (parsed.max_rows) |limit| {
             if (rows_inserted > limit)
-                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, sqlite_mod.exit_usage, .{limit});
+                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
         }
-        insertRowTyped(stmt, db, row, types, @intCast(num_cols)) catch
+        insertRowTyped(stmt, row, types, @intCast(num_cols)) catch
             fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
         if (is_tty and rows_inserted % progress_interval == 0)
             printProgress(stderr_writer, rows_inserted, parsed.max_rows);
@@ -428,13 +404,13 @@ pub fn loadCsvInput(
             error.UnterminatedQuotedField => fatal(
                 "row {d}: unterminated quoted field",
                 stderr_writer,
-                sqlite_mod.exit_parse,
+                .csv_error,
                 .{csv_row_count + 1},
             ),
             else => fatal(
                 "row {d}: failed to parse CSV",
                 stderr_writer,
-                sqlite_mod.exit_parse,
+                .csv_error,
                 .{csv_row_count + 1},
             ),
         } orelse break;
@@ -446,9 +422,9 @@ pub fn loadCsvInput(
         rows_inserted += 1;
         if (parsed.max_rows) |limit| {
             if (rows_inserted > limit)
-                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, sqlite_mod.exit_usage, .{limit});
+                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
         }
-        insertRowTyped(stmt, db, record, types, @intCast(num_cols)) catch
+        insertRowTyped(stmt, record, types, @intCast(num_cols)) catch
             fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer);
         if (is_tty and rows_inserted % progress_interval == 0)
             printProgress(stderr_writer, rows_inserted, parsed.max_rows);
diff --git a/src/main.zig b/src/main.zig
index 2bfd761..1c13623 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -16,6 +16,7 @@ const VERSION: []const u8 = build_options.version;
 
 const SqlPipeError = args_mod.SqlPipeError;
 const ParsedArgs = args_mod.ParsedArgs;
+const ExitCode = args_mod.ExitCode;
 const parseArgs = args_mod.parseArgs;
 const printUsage = args_mod.printUsage;
 
@@ -23,18 +24,6 @@ const loadCsvInput = loader.loadCsvInput;
 const fmtThousands = loader.fmtThousands;
 const progress_interval = loader.progress_interval;
 
-/// Structured exit codes for scripting.
-///   0 = success
-///   1 = usage error (missing query, bad flag)
-///   2 = CSV parse error
-///   3 = SQL error (sqlite3 error)
-const ExitCode = enum(u8) {
-    success = 0,
-    usage = 1,
-    csv_error = 2,
-    sql_error = 3,
-};
-
 /// Supported input formats (canonical definition lives in format.zig).
 const InputFormat = format.InputFormat;
 
diff --git a/src/modes/columns.zig b/src/modes/columns.zig
index 2d3e199..88825f4 100644
--- a/src/modes/columns.zig
+++ b/src/modes/columns.zig
@@ -9,12 +9,7 @@ const inferTypes = loader.inferTypes;
 const parseHeader = loader.parseHeader;
 const inference_buffer_size = loader.inference_buffer_size;
 
-const ExitCode = enum(u8) {
-    success = 0,
-    usage = 1,
-    csv_error = 2,
-    sql_error = 3,
-};
+const ExitCode = args_mod.ExitCode;
 
 fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, f_args: anytype) noreturn {
     writer.print("error: " ++ fmt ++ "\n", f_args) catch |err| {
diff --git a/src/modes/sample.zig b/src/modes/sample.zig
index 084a270..61b46f0 100644
--- a/src/modes/sample.zig
+++ b/src/modes/sample.zig
@@ -10,12 +10,7 @@ const inferTypes = loader.inferTypes;
 const parseHeader = loader.parseHeader;
 const inference_buffer_size = loader.inference_buffer_size;
 
-const ExitCode = enum(u8) {
-    success = 0,
-    usage = 1,
-    csv_error = 2,
-    sql_error = 3,
-};
+const ExitCode = args_mod.ExitCode;
 
 fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, f_args: anytype) noreturn {
     writer.print("error: " ++ fmt ++ "\n", f_args) catch |err| {
diff --git a/src/modes/validate.zig b/src/modes/validate.zig
index 6926328..627346f 100644
--- a/src/modes/validate.zig
+++ b/src/modes/validate.zig
@@ -12,12 +12,7 @@ const parseHeader = loader.parseHeader;
 const fmtThousands = loader.fmtThousands;
 const inference_buffer_size = loader.inference_buffer_size;
 
-const ExitCode = enum(u8) {
-    success = 0,
-    usage = 1,
-    csv_error = 2,
-    sql_error = 3,
-};
+const ExitCode = args_mod.ExitCode;
 
 fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, f_args: anytype) noreturn {
     writer.print("error: " ++ fmt ++ "\n", f_args) catch |err| {
diff --git a/src/sqlite.zig b/src/sqlite.zig
index e1ef096..ddf320b 100644
--- a/src/sqlite.zig
+++ b/src/sqlite.zig
@@ -2,6 +2,9 @@
 
 const std = @import("std");
 const c = @import("c");
+const args_mod = @import("args.zig");
+
+pub const ExitCode = args_mod.ExitCode;
 
 /// SQLITE_STATIC: caller manages string lifetime; SQLite must not free it.
 pub const sqlite_static: c.sqlite3_destructor_type = null;
@@ -9,18 +12,13 @@ pub const sqlite_static: c.sqlite3_destructor_type = null;
 /// Inferred SQLite affinity for a CSV column.
 pub const ColumnType = enum { TEXT, INTEGER, REAL };
 
-// Shared exit codes (same values as in each format module)
-pub const exit_usage: u8 = 1;
-pub const exit_parse: u8 = 2;
-pub const exit_sql: u8 = 3;
-
 /// fatal(fmt, writer, code, args) → noreturn
 ///
 /// Writes an error message to writer and exits with the given code.
-pub fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: u8, args: anytype) noreturn {
+pub fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, args: anytype) noreturn {
     writer.print("error: " ++ fmt ++ "\n", args) catch |err| std.log.err("failed to write error: {}", .{err});
     writer.flush() catch |err| std.log.err("failed to flush: {}", .{err});
-    std.process.exit(code);
+    std.process.exit(@intFromEnum(code));
 }
 
 /// Create table `t` with all-TEXT columns. Column names are double-quote–escaped
@@ -34,24 +32,24 @@ pub fn createAllTextTable(
     var sql: std.ArrayList(u8) = .empty;
     defer sql.deinit(allocator);
 
-    sql.appendSlice(allocator, "CREATE TABLE t (") catch fatal("out of memory", writer, exit_parse, .{});
+    sql.appendSlice(allocator, "CREATE TABLE t (") catch fatal("out of memory", writer, .csv_error, .{});
     for (cols, 0..) |col, i| {
-        if (i > 0) sql.appendSlice(allocator, ", ") catch fatal("out of memory", writer, exit_parse, .{});
-        sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{});
+        if (i > 0) sql.appendSlice(allocator, ", ") catch fatal("out of memory", writer, .csv_error, .{});
+        sql.append(allocator, '"') catch fatal("out of memory", writer, .csv_error, .{});
         for (col) |ch| {
-            if (ch == '"') sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{});
-            sql.append(allocator, ch) catch fatal("out of memory", writer, exit_parse, .{});
+            if (ch == '"') sql.append(allocator, '"') catch fatal("out of memory", writer, .csv_error, .{});
+            sql.append(allocator, ch) catch fatal("out of memory", writer, .csv_error, .{});
         }
-        sql.appendSlice(allocator, "\" TEXT") catch fatal("out of memory", writer, exit_parse, .{});
+        sql.appendSlice(allocator, "\" TEXT") catch fatal("out of memory", writer, .csv_error, .{});
     }
-    sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, exit_parse, .{});
-    sql.append(allocator, 0) catch fatal("out of memory", writer, exit_parse, .{});
+    sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, .csv_error, .{});
+    sql.append(allocator, 0) catch fatal("out of memory", writer, .csv_error, .{});
 
     var errmsg: [*c]u8 = null;
     if (c.sqlite3_exec(db, sql.items.ptr, null, null, &errmsg) != c.SQLITE_OK) {
         const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
         if (errmsg != null) c.sqlite3_free(errmsg);
-        fatal("{s}", writer, exit_sql, .{msg});
+        fatal("{s}", writer, .sql_error, .{msg});
     }
 }
 
@@ -65,17 +63,17 @@ pub fn prepareInsertStmt(
     var sql: std.ArrayList(u8) = .empty;
     defer sql.deinit(allocator);
 
-    sql.appendSlice(allocator, "INSERT INTO t VALUES (") catch fatal("out of memory", writer, exit_parse, .{});
+    sql.appendSlice(allocator, "INSERT INTO t VALUES (") catch fatal("out of memory", writer, .csv_error, .{});
     for (0..n) |i| {
-        if (i > 0) sql.append(allocator, ',') catch fatal("out of memory", writer, exit_parse, .{});
-        sql.append(allocator, '?') catch fatal("out of memory", writer, exit_parse, .{});
+        if (i > 0) sql.append(allocator, ',') catch fatal("out of memory", writer, .csv_error, .{});
+        sql.append(allocator, '?') catch fatal("out of memory", writer, .csv_error, .{});
     }
-    sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, exit_parse, .{});
-    sql.append(allocator, 0) catch fatal("out of memory", writer, exit_parse, .{});
+    sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, .csv_error, .{});
+    sql.append(allocator, 0) catch fatal("out of memory", writer, .csv_error, .{});
 
     var stmt: ?*c.sqlite3_stmt = null;
     if (c.sqlite3_prepare_v2(db, sql.items.ptr, -1, &stmt, null) != c.SQLITE_OK)
-        fatal("{s}", writer, exit_sql, .{std.mem.span(c.sqlite3_errmsg(db))});
+        fatal("{s}", writer, .sql_error, .{std.mem.span(c.sqlite3_errmsg(db))});
     return stmt.?;
 }
 
@@ -84,7 +82,7 @@ pub fn beginTransaction(db: *c.sqlite3, writer: *std.Io.Writer) void {
     if (c.sqlite3_exec(db, "BEGIN TRANSACTION", null, null, &errmsg) != c.SQLITE_OK) {
         const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
         if (errmsg != null) c.sqlite3_free(errmsg);
-        fatal("{s}", writer, exit_sql, .{msg});
+        fatal("{s}", writer, .sql_error, .{msg});
     }
 }
 
@@ -93,7 +91,7 @@ pub fn commitTransaction(db: *c.sqlite3, writer: *std.Io.Writer) void {
     if (c.sqlite3_exec(db, "COMMIT", null, null, &errmsg) != c.SQLITE_OK) {
         const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
         if (errmsg != null) c.sqlite3_free(errmsg);
-        fatal("{s}", writer, exit_sql, .{msg});
+        fatal("{s}", writer, .sql_error, .{msg});
     }
 }
 
@@ -103,7 +101,7 @@ pub fn commitTransaction(db: *c.sqlite3, writer: *std.Io.Writer) void {
 pub fn openDb(writer: *std.Io.Writer) *c.sqlite3 {
     var db: ?*c.sqlite3 = null;
     if (c.sqlite3_open(":memory:", &db) != c.SQLITE_OK)
-        fatal("failed to open in-memory database", writer, exit_sql, .{});
+        fatal("failed to open in-memory database", writer, .sql_error, .{});
     return db.?;
 }
 
@@ -126,29 +124,29 @@ pub fn createTable(
     var sql: std.ArrayList(u8) = .empty;
     defer sql.deinit(allocator);
 
-    sql.appendSlice(allocator, "CREATE TABLE t (") catch fatal("out of memory", writer, exit_parse, .{});
+    sql.appendSlice(allocator, "CREATE TABLE t (") catch fatal("out of memory", writer, .csv_error, .{});
     for (cols, 0..) |col, i| {
-        if (i > 0) sql.appendSlice(allocator, ", ") catch fatal("out of memory", writer, exit_parse, .{});
-        sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{});
+        if (i > 0) sql.appendSlice(allocator, ", ") catch fatal("out of memory", writer, .csv_error, .{});
+        sql.append(allocator, '"') catch fatal("out of memory", writer, .csv_error, .{});
         for (col) |ch| {
-            if (ch == '"') sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{});
-            sql.append(allocator, ch) catch fatal("out of memory", writer, exit_parse, .{});
+            if (ch == '"') sql.append(allocator, '"') catch fatal("out of memory", writer, .csv_error, .{});
+            sql.append(allocator, ch) catch fatal("out of memory", writer, .csv_error, .{});
         }
-        sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{});
+        sql.append(allocator, '"') catch fatal("out of memory", writer, .csv_error, .{});
         sql.appendSlice(allocator, switch (types[i]) {
             .INTEGER => " INTEGER",
             .REAL => " REAL",
             .TEXT => " TEXT",
-        }) catch fatal("out of memory", writer, exit_parse, .{});
+        }) catch fatal("out of memory", writer, .csv_error, .{});
     }
-    sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, exit_parse, .{});
-    sql.append(allocator, 0) catch fatal("out of memory", writer, exit_parse, .{});
+    sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, .csv_error, .{});
+    sql.append(allocator, 0) catch fatal("out of memory", writer, .csv_error, .{});
 
     var errmsg: [*c]u8 = null;
     if (c.sqlite3_exec(db, sql.items.ptr, null, null, &errmsg) != c.SQLITE_OK) {
         const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db));
         if (errmsg != null) c.sqlite3_free(errmsg);
-        fatal("{s}", writer, exit_sql, .{msg});
+        fatal("{s}", writer, .sql_error, .{msg});
     }
 }
 
@@ -260,5 +258,5 @@ pub fn fatalSqlWithContext(
     };
     printSqlErrorContext(allocator, db, errmsg, writer);
     writer.flush() catch |err| std.log.err("failed to flush: {}", .{err});
-    std.process.exit(exit_sql);
+    std.process.exit(@intFromEnum(ExitCode.sql_error));
 }
diff --git a/src/xml.zig b/src/xml.zig
index 95e99ad..525b5f6 100644
--- a/src/xml.zig
+++ b/src/xml.zig
@@ -37,9 +37,7 @@ const prepareInsertStmt = sqlite_helpers.prepareInsertStmt;
 const beginTransaction = sqlite_helpers.beginTransaction;
 const commitTransaction = sqlite_helpers.commitTransaction;
 const fatal = sqlite_helpers.fatal;
-const exit_usage = sqlite_helpers.exit_usage;
-const exit_parse = sqlite_helpers.exit_parse;
-const exit_sql = sqlite_helpers.exit_sql;
+const ExitCode = sqlite_helpers.ExitCode;
 const sqlite_static = sqlite_helpers.sqlite_static;
 
 // ─── XML escaping ─────────────────────────────────────
@@ -279,7 +277,7 @@ pub const XmlParser = struct {
         err_writer.print("error: xml: line {d}, col {d}: ", .{ self.line, self.col }) catch |err| std.log.err("failed to write error: {}", .{err});
         err_writer.print(fmt ++ "\n", args) catch |err| std.log.err("failed to write error: {}", .{err});
         err_writer.flush() catch |err| std.log.err("failed to flush: {}", .{err});
-        std.process.exit(exit_parse);
+        std.process.exit(@intFromEnum(ExitCode.csv_error));
     }
 
     // ─── Skip helpers ────────────────────────────────────
@@ -701,11 +699,11 @@ pub fn getXmlColumnNames(
     while (true) {
         const byte = reader.takeByte() catch |err| switch (err) {
             error.EndOfStream => break,
-            error.ReadFailed => fatal("failed to read XML input", stderr_writer, exit_parse, .{}),
+            error.ReadFailed => fatal("failed to read XML input", stderr_writer, .csv_error, .{}),
         };
-        buf.append(allocator, byte) catch fatal("out of memory reading XML", stderr_writer, exit_parse, .{});
+        buf.append(allocator, byte) catch fatal("out of memory reading XML", stderr_writer, .csv_error, .{});
     }
-    if (buf.items.len == 0) fatal("empty input", stderr_writer, exit_parse, .{});
+    if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{});
 
     var p = XmlParser.init(buf.items);
     p.skipPrologue(stderr_writer);
@@ -715,12 +713,12 @@ pub fn getXmlColumnNames(
     } else p.readRootOpen(stderr_writer);
 
     const cols = p.nextRow(allocator, root_name, xml_row, stderr_writer) catch
-        fatal("out of memory parsing XML", stderr_writer, exit_parse, .{});
+        fatal("out of memory parsing XML", stderr_writer, .csv_error, .{});
     if (cols == null) {
         if (xml_row) |row_tag|
-            fatal("XML document has no '{s}' elements (check --xml-row value)", stderr_writer, exit_parse, .{row_tag})
+            fatal("XML document has no '{s}' elements (check --xml-row value)", stderr_writer, .csv_error, .{row_tag})
         else
-            fatal("XML document has no row elements", stderr_writer, exit_parse, .{});
+            fatal("XML document has no row elements", stderr_writer, .csv_error, .{});
     }
     defer {
         for (cols.?) |col| if (col.value) |v| allocator.free(v);
@@ -730,10 +728,10 @@ pub fn getXmlColumnNames(
     var names: std.ArrayList([]const u8) = .empty;
     for (cols.?) |col| {
         const owned = allocator.dupe(u8, col.name) catch
-            fatal("out of memory", stderr_writer, exit_parse, .{});
-        names.append(allocator, owned) catch fatal("out of memory", stderr_writer, exit_parse, .{});
+            fatal("out of memory", stderr_writer, .csv_error, .{});
+        names.append(allocator, owned) catch fatal("out of memory", stderr_writer, .csv_error, .{});
     }
-    return names.toOwnedSlice(allocator) catch fatal("out of memory", stderr_writer, exit_parse, .{});
+    return names.toOwnedSlice(allocator) catch fatal("out of memory", stderr_writer, .csv_error, .{});
 }
 
 /// XmlSummary — result of summarizeXml.
@@ -765,11 +763,11 @@ pub fn summarizeXml(
     while (true) {
         const byte = reader.takeByte() catch |err| switch (err) {
             error.EndOfStream => break,
-            error.ReadFailed => fatal("failed to read XML input", stderr_writer, exit_parse, .{}),
+            error.ReadFailed => fatal("failed to read XML input", stderr_writer, .csv_error, .{}),
         };
-        buf.append(allocator, byte) catch fatal("out of memory reading XML", stderr_writer, exit_parse, .{});
+        buf.append(allocator, byte) catch fatal("out of memory reading XML", stderr_writer, .csv_error, .{});
     }
-    if (buf.items.len == 0) fatal("empty input", stderr_writer, exit_parse, .{});
+    if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{});
 
     var p = XmlParser.init(buf.items);
     p.skipPrologue(stderr_writer);
@@ -785,7 +783,7 @@ pub fn summarizeXml(
     // Bounding function: rows remaining in the XML document (finite)
     while (true) {
         const cols = p.nextRow(allocator, root_name, xml_row, stderr_writer) catch
-            fatal("out of memory parsing XML", stderr_writer, exit_parse, .{});
+            fatal("out of memory parsing XML", stderr_writer, .csv_error, .{});
         if (cols == null) break;
         defer {
             for (cols.?) |col| if (col.value) |v| allocator.free(v);
@@ -796,19 +794,19 @@ pub fn summarizeXml(
             var names: std.ArrayList([]const u8) = .empty;
             for (cols.?) |col| {
                 const owned = allocator.dupe(u8, col.name) catch
-                    fatal("out of memory", stderr_writer, exit_parse, .{});
-                names.append(allocator, owned) catch fatal("out of memory", stderr_writer, exit_parse, .{});
+                    fatal("out of memory", stderr_writer, .csv_error, .{});
+                names.append(allocator, owned) catch fatal("out of memory", stderr_writer, .csv_error, .{});
             }
             col_names = names.toOwnedSlice(allocator) catch
-                fatal("out of memory", stderr_writer, exit_parse, .{});
+                fatal("out of memory", stderr_writer, .csv_error, .{});
         }
     }
 
     if (col_names == null) {
         if (xml_row) |row_tag|
-            fatal("XML document has no '{s}' elements (check --xml-row value)", stderr_writer, exit_parse, .{row_tag})
+            fatal("XML document has no '{s}' elements (check --xml-row value)", stderr_writer, .csv_error, .{row_tag})
         else
-            fatal("XML document has no row elements", stderr_writer, exit_parse, .{});
+            fatal("XML document has no row elements", stderr_writer, .csv_error, .{});
     }
     return .{ .row_count = row_count, .col_names = col_names.? };
 }
@@ -838,11 +836,11 @@ pub fn loadXmlInput(
     while (true) {
         const byte = reader.takeByte() catch |err| switch (err) {
             error.EndOfStream => break,
-            error.ReadFailed => fatal("failed to read XML input", stderr_writer, exit_parse, .{}),
+            error.ReadFailed => fatal("failed to read XML input", stderr_writer, .csv_error, .{}),
         };
-        buf.append(allocator, byte) catch fatal("out of memory reading XML input", stderr_writer, exit_parse, .{});
+        buf.append(allocator, byte) catch fatal("out of memory reading XML input", stderr_writer, .csv_error, .{});
     }
-    if (buf.items.len == 0) fatal("empty input", stderr_writer, exit_parse, .{});
+    if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{});
 
     var p = XmlParser.init(buf.items);
     p.skipPrologue(stderr_writer);
@@ -872,7 +870,7 @@ pub fn loadXmlInput(
     // Bounding function: row elements remaining in the document (finite)
     while (true) {
         const cols = p.nextRow(allocator, root_name, xml_row, stderr_writer) catch
-            fatal("out of memory parsing XML", stderr_writer, exit_parse, .{});
+            fatal("out of memory parsing XML", stderr_writer, .csv_error, .{});
         if (cols == null) break;
 
         defer {
@@ -883,7 +881,7 @@ pub fn loadXmlInput(
         rows_inserted += 1;
         if (max_rows) |limit| {
             if (rows_inserted > limit)
-                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, exit_usage, .{limit});
+                fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
         }
 
         if (col_names == null) {
@@ -891,12 +889,12 @@ pub fn loadXmlInput(
             var names: std.ArrayList([]const u8) = .empty;
             for (cols.?) |col| {
                 const owned = allocator.dupe(u8, col.name) catch
-                    fatal("out of memory", stderr_writer, exit_parse, .{});
-                names.append(allocator, owned) catch fatal("out of memory", stderr_writer, exit_parse, .{});
+                    fatal("out of memory", stderr_writer, .csv_error, .{});
+                names.append(allocator, owned) catch fatal("out of memory", stderr_writer, .csv_error, .{});
             }
             if (names.items.len == 0)
-                fatal("first XML row element has no column children", stderr_writer, exit_parse, .{});
-            col_names = names.toOwnedSlice(allocator) catch fatal("out of memory", stderr_writer, exit_parse, .{});
+                fatal("first XML row element has no column children", stderr_writer, .csv_error, .{});
+            col_names = names.toOwnedSlice(allocator) catch fatal("out of memory", stderr_writer, .csv_error, .{});
 
             createAllTextTable(allocator, db, col_names.?, stderr_writer);
             beginTransaction(db, stderr_writer);
@@ -922,22 +920,22 @@ pub fn loadXmlInput(
             };
             if (value) |v| {
                 if (c.sqlite3_bind_text(stmt, param_idx, v.ptr, @intCast(v.len), sqlite_static) != c.SQLITE_OK)
-                    fatal("{s}", stderr_writer, exit_sql, .{std.mem.span(c.sqlite3_errmsg(db))});
+                    fatal("{s}", stderr_writer, .sql_error, .{std.mem.span(c.sqlite3_errmsg(db))});
             } else {
                 if (c.sqlite3_bind_null(stmt, param_idx) != c.SQLITE_OK)
-                    fatal("{s}", stderr_writer, exit_sql, .{std.mem.span(c.sqlite3_errmsg(db))});
+                    fatal("{s}", stderr_writer, .sql_error, .{std.mem.span(c.sqlite3_errmsg(db))});
             }
         }
 
         if (c.sqlite3_step(stmt) != c.SQLITE_DONE)
-            fatal("{s}", stderr_writer, exit_sql, .{std.mem.span(c.sqlite3_errmsg(db))});
+            fatal("{s}", stderr_writer, .sql_error, .{std.mem.span(c.sqlite3_errmsg(db))});
     }
 
     if (col_names == null) {
         if (xml_row) |row_tag|
-            fatal("XML document has no '{s}' elements (check --xml-row value)", stderr_writer, exit_parse, .{row_tag})
+            fatal("XML document has no '{s}' elements (check --xml-row value)", stderr_writer, .csv_error, .{row_tag})
         else
-            fatal("XML document has no row elements", stderr_writer, exit_parse, .{});
+            fatal("XML document has no row elements", stderr_writer, .csv_error, .{});
     }
     if (in_transaction) commitTransaction(db, stderr_writer);
     return rows_inserted;