From 58873989e806943d11bde544f4560f5286df2ac1 Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@gmail.com>
Date: Thu, 7 May 2026 12:52:58 +0200
Subject: [PATCH 1/3] feat: add --validate mode to check CSV syntax without
 running a query (#88)

---
 README.md           |   1 +
 build.zig           |  38 +++++++++
 docs/sql-pipe.1.scd |   8 ++
 src/main.zig        | 184 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 231 insertions(+)
diff --git a/README.md b/README.md
index 2a992b8..8ff4f05 100644
--- a/README.md
+++ b/README.md
@@ -211,6 +211,7 @@ $ cat events.csv \
 | `-H`, `--header` | Print column names as the first output row |
 | `--json` | Alias for `--output-format json` (mutually exclusive with `-H`) |
 | `--max-rows <n>` | Stop if more than `n` data rows are read (exit 1) |
+| `--validate` | Parse the entire CSV input and print a summary (`OK: <n> rows, <m> columns (...)`) to stdout. Exit 0 on success, exit 2 on CSV error. No query required. Compatible with `--delimiter`, `--tsv`, `--no-type-inference`. |
 | `--columns` | Read the CSV header row, print each column name on its own line, and exit 0. With `-v`/`--verbose`, also shows the inferred type per column (`name INTEGER`). Respects `--delimiter` and `--tsv`. Mutually exclusive with a query argument. |
 | `--output <file>` | Write results to the given file instead of stdout. Creates or overwrites the file. Exits 1 if the file cannot be created. |
 | `-v`, `--verbose` | Print `Loaded <n> rows in <t>s` to stderr after loading (always on TTY; forced with flag) |
diff --git a/build.zig b/build.zig
index e24c32c..3df9858 100644
--- a/build.zig
+++ b/build.zig
@@ -780,6 +780,44 @@ pub fn build(b: *std.Build) void {
     test_silent_v_conflict.step.dependOn(b.getInstallStep());
     test_step.dependOn(&test_silent_v_conflict.step);
 
+    // Integration test 75: --validate on valid CSV prints OK summary and exits 0
+    const test_validate_ok = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,name,amount\n1,Alice,3.14\n2,Bob,2.72\n' | ./zig-out/bin/sql-pipe --validate)
+        \\expected='OK: 2 rows, 3 columns (id INTEGER, name TEXT, amount REAL)'
+        \\[ "$result" = "$expected" ]
+    });
+    test_validate_ok.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_ok.step);
+
+    // Integration test 76: --validate on malformed CSV exits 2
+    const test_validate_error = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\msg=$(printf 'id,name\n"unterminated' | ./zig-out/bin/sql-pipe --validate 2>&1 >/dev/null; echo "EXIT:$?")
+        \\echo "$msg" | grep -q 'row 2: unterminated quoted field' && echo "$msg" | grep -q 'EXIT:2'
+    });
+    test_validate_error.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_error.step);
+
+    // Integration test 77: --validate with custom delimiter
+    const test_validate_delimiter = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id|name|amount\n1|Alice|3.14\n' | ./zig-out/bin/sql-pipe --validate --delimiter '|')
+        \\expected='OK: 1 rows, 3 columns (id INTEGER, name TEXT, amount REAL)'
+        \\[ "$result" = "$expected" ]
+    });
+    test_validate_delimiter.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_delimiter.step);
+
+    // Integration test 78: --validate with query argument exits 1
+    const test_validate_with_query = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\msg=$(printf 'a,b\n1,2\n' | ./zig-out/bin/sql-pipe --validate 'SELECT * FROM t' 2>&1 >/dev/null; echo "EXIT:$?")
+        \\echo "$msg" | grep -q 'error: --validate cannot be combined with a query argument' && echo "$msg" | grep -q 'EXIT:1'
+    });
+    test_validate_with_query.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_with_query.step);
+
     // Unit tests for the RFC 4180 CSV parser (src/csv.zig)
     const unit_tests = b.addTest(.{
         .root_module = b.createModule(.{
diff --git a/docs/sql-pipe.1.scd b/docs/sql-pipe.1.scd
index 0ef350f..19358b5 100644
--- a/docs/sql-pipe.1.scd
+++ b/docs/sql-pipe.1.scd
@@ -70,6 +70,14 @@ OPTIONS
 		stderr is a TTY. Useful for producing clean stderr in interactive
 		terminals. Cannot be combined with *-v* / *--verbose*.
 
+	*--validate*
+		Parse the entire CSV input without executing a SQL query. On
+		success, prints a one-line summary to standard output:
+		*OK: <n> rows, <m> columns (<col> <TYPE>, ...)* and exits 0.
+		On CSV parse error, prints the error message and exits 2.
+		Compatible with *--delimiter*, *--tsv*, and
+		*--no-type-inference*. Mutually exclusive with a query argument.
+
 	*--columns*
 		Read the CSV header row, print each column name on its own line to
 		standard output, and exit with code 0. When combined with *-v* /
diff --git a/src/main.zig b/src/main.zig
index 0f48f84..3f215f1 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -20,6 +20,7 @@ const SqlPipeError = error{
     IncompatibleFlags,
     SilentVerboseConflict,
     ColumnsWithQuery,
+    ValidateWithQuery,
     InvalidMaxRows,
     InvalidInputFormat,
     InvalidOutputFormat,
@@ -101,6 +102,16 @@ const ColumnsArgs = struct {
     input_format: InputFormat,
 };
 
+/// Arguments for `--validate` mode.
+const ValidateArgs = struct {
+    /// CSV field delimiter (default: ',').
+    delimiter: u8,
+    /// Infer column types from the first 100 buffered rows when true.
+    type_inference: bool,
+    /// Input format (default: csv).
+    input_format: InputFormat,
+};
+
 /// Result of argument parsing — either parsed arguments or a special action.
 const ArgsResult = union(enum) {
     /// Normal execution: run the query.
@@ -111,6 +122,8 @@ const ArgsResult = union(enum) {
     version,
     /// User requested --columns: list column names and exit.
     columns: ColumnsArgs,
+    /// User requested --validate: parse CSV and print summary.
+    validate: ValidateArgs,
 };
 
 // ─── Extracted functions ──────────────────────────────
@@ -138,6 +151,10 @@ fn printUsage(writer: *std.Io.Writer) !void {
         \\                               With --columns: show inferred type per column
         \\  -s, --silent                 Suppress row count output unconditionally
         \\                               Cannot be combined with -v/--verbose
+        \\  --validate                   Parse the entire CSV input and print a summary to stdout
+        \\                               (OK: <n> rows, <m> columns (<col> <TYPE>, ...))
+        \\                               Exit 0 on success, exit 2 on CSV error. No query required.
+        \\                               Compatible with --delimiter, --tsv, --no-type-inference.
         \\  --columns                    List column names from input header (one per line) and exit
         \\                               Combine with -v/--verbose to include inferred types
         \\                               Cannot be combined with --output or a query argument
@@ -218,6 +235,7 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
     var verbose = false;
     var silent = false;
     var list_columns = false;
+    var validate = false;
     var output: ?[]const u8 = null;
 
     // Loop invariant I: all args[1..i] have been processed;
@@ -282,6 +300,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
             silent = true;
         } else if (std.mem.eql(u8, arg, "--columns")) {
             list_columns = true;
+        } else if (std.mem.eql(u8, arg, "--validate")) {
+            validate = true;
         } else if (std.mem.eql(u8, arg, "--output")) {
             i += 1;
             if (i >= args.len) return error.InvalidOutputPath;
@@ -309,6 +329,10 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
     if (list_columns and query != null)
         return error.ColumnsWithQuery;
 
+    // --validate is mutually exclusive with a query argument
+    if (validate and query != null)
+        return error.ValidateWithQuery;
+
     // --silent and --verbose are mutually exclusive
     if (silent and verbose)
         return error.SilentVerboseConflict;
@@ -321,6 +345,14 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
             .input_format = input_format,
         } };
 
+    // --validate mode: parse CSV and print summary
+    if (validate)
+        return .{ .validate = ValidateArgs{
+            .delimiter = delimiter,
+            .type_inference = type_inference,
+            .input_format = input_format,
+        } };
+
     return .{ .parsed = ParsedArgs{
         .query = query orelse return error.MissingQuery,
         .type_inference = type_inference,
@@ -1323,6 +1355,142 @@ fn runColumns(
     }
 }
 
+/// runValidate(args, allocator, io, stderr_writer, stdout_writer) → void
+/// Pre:  args is valid; allocator and writers are valid
+/// Post: the entire CSV/TSV input has been parsed; on success prints
+///       "OK: <n> rows, <m> columns (<col> <TYPE>, ...)" to stdout and exits 0.
+///       On CSV parse error, prints the error message to stderr and exits 2.
+fn runValidate(
+    args: ValidateArgs,
+    allocator: std.mem.Allocator,
+    io: std.Io,
+    stderr_writer: *std.Io.Writer,
+    stdout_writer: *std.Io.Writer,
+) void {
+    switch (args.input_format) {
+        .csv, .tsv => {
+            const col_delim: u8 = if (args.input_format == .tsv) '\t' else args.delimiter;
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+            var csv_reader = csv.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, col_delim);
+
+            const header_record = csv_reader.nextRecord() catch |err| switch (err) {
+                error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}),
+                else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}),
+            } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{});
+            defer csv_reader.freeRecord(header_record);
+
+            const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) {
+                error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}),
+                error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}),
+                else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}),
+            };
+            defer {
+                for (cols) |col| allocator.free(col);
+                allocator.free(cols);
+            }
+
+            const num_cols = cols.len;
+            var csv_row_count: usize = 1; // header already read
+            var data_row_count: usize = 0;
+
+            var row_buffer: std.ArrayList([][]u8) = .empty;
+            defer {
+                for (row_buffer.items) |row| csv_reader.freeRecord(row);
+                row_buffer.deinit(allocator);
+            }
+
+            // Buffer up to inference_buffer_size rows for type inference
+            while (row_buffer.items.len < inference_buffer_size) {
+                const rec = csv_reader.nextRecord() catch |err| switch (err) {
+                    error.UnterminatedQuotedField => fatal(
+                        "row {d}: unterminated quoted field",
+                        stderr_writer,
+                        .csv_error,
+                        .{csv_row_count + 1},
+                    ),
+                    else => fatal(
+                        "row {d}: failed to parse CSV",
+                        stderr_writer,
+                        .csv_error,
+                        .{csv_row_count + 1},
+                    ),
+                } orelse break;
+                csv_row_count += 1;
+                if (rec.len == 0) {
+                    csv_reader.freeRecord(rec);
+                    continue;
+                }
+                data_row_count += 1;
+                row_buffer.append(allocator, rec) catch
+                    fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{});
+            }
+
+            const types: []ColumnType = if (args.type_inference) blk: {
+                break :blk inferTypes(allocator, row_buffer.items, num_cols) catch
+                    fatal("out of memory during type inference", stderr_writer, .csv_error, .{});
+            } else blk: {
+                const t = allocator.alloc(ColumnType, num_cols) catch
+                    fatal("out of memory", stderr_writer, .csv_error, .{});
+                @memset(t, .TEXT);
+                break :blk t;
+            };
+            defer allocator.free(types);
+
+            // Stream remaining rows and count them
+            while (true) {
+                const record = csv_reader.nextRecord() catch |err| switch (err) {
+                    error.UnterminatedQuotedField => fatal(
+                        "row {d}: unterminated quoted field",
+                        stderr_writer,
+                        .csv_error,
+                        .{csv_row_count + 1},
+                    ),
+                    else => fatal(
+                        "row {d}: failed to parse CSV",
+                        stderr_writer,
+                        .csv_error,
+                        .{csv_row_count + 1},
+                    ),
+                } orelse break;
+                csv_row_count += 1;
+                defer csv_reader.freeRecord(record);
+                if (record.len == 0) continue;
+                data_row_count += 1;
+            }
+
+            var count_buf: [32]u8 = undefined;
+            const count_str = fmtThousands(&count_buf, data_row_count);
+
+            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, num_cols }) catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+
+            for (cols, types, 0..) |col, t, i| {
+                if (i > 0) {
+                    stdout_writer.writeAll(", ") catch |err| {
+                        std.log.err("failed to write output: {}", .{err});
+                        std.process.exit(@intFromEnum(ExitCode.usage));
+                    };
+                }
+                stdout_writer.print("{s} {s}", .{ col, @tagName(t) }) catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+            }
+            stdout_writer.writeAll(")\n") catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+        },
+        .json, .ndjson => {
+            // --validate is only meaningful for CSV/TSV input
+            fatal("--validate is only supported for CSV and TSV input", stderr_writer, .usage, .{});
+        },
+    }
+}
+
 /// run(parsed, allocator, io, stderr_writer, stdout_writer) → void
 /// Pre:  parsed contains a valid query; allocator and writers are valid
 /// Post: input from stdin has been loaded (dispatched on parsed.input_format),
@@ -1454,6 +1622,13 @@ pub fn main(init: std.process.Init.Minimal) void {
                 stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr});
                 std.process.exit(@intFromEnum(ExitCode.usage));
             },
+            error.ValidateWithQuery => {
+                stderr_writer.writeAll("error: --validate cannot be combined with a query argument\n") catch |werr| {
+                    std.log.err("failed to write error message: {}", .{werr});
+                };
+                stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            },
             error.InvalidOutputPath => {
                 stderr_writer.writeAll("error: --output requires a non-empty file path\n") catch |werr| {
                     std.log.err("failed to write error message: {}", .{werr});
@@ -1501,6 +1676,15 @@ pub fn main(init: std.process.Init.Minimal) void {
                 std.log.err("failed to flush stderr: {}", .{err});
             };
         },
+        .validate => |val_args| {
+            runValidate(val_args, allocator, io.io(), stderr_writer, stdout_writer);
+            stdout_file_writer.flush() catch |err| {
+                std.log.err("failed to flush stdout: {}", .{err});
+            };
+            stderr_file_writer.flush() catch |err| {
+                std.log.err("failed to flush stderr: {}", .{err});
+            };
+        },
         .parsed => |parsed| {
             if (parsed.output) |output_path| {
                 const output_file = std.Io.Dir.createFile(std.Io.Dir.cwd(), io.io(), output_path, .{}) catch |err| {

From baa67d9f6fb9d09c4ffc506520546a5f923c07d1 Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@gmail.com>
Date: Thu, 7 May 2026 12:56:16 +0200
Subject: [PATCH 2/3] feat: extend --validate to support JSON and NDJSON input
 formats

---
 README.md           |   2 +-
 build.zig           |  31 ++++++++++
 docs/sql-pipe.1.scd |  12 ++--
 src/main.zig        | 145 ++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 178 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 8ff4f05..b0226e4 100644
--- a/README.md
+++ b/README.md
@@ -211,7 +211,7 @@ $ cat events.csv \
 | `-H`, `--header` | Print column names as the first output row |
 | `--json` | Alias for `--output-format json` (mutually exclusive with `-H`) |
 | `--max-rows <n>` | Stop if more than `n` data rows are read (exit 1) |
-| `--validate` | Parse the entire CSV input and print a summary (`OK: <n> rows, <m> columns (...)`) to stdout. Exit 0 on success, exit 2 on CSV error. No query required. Compatible with `--delimiter`, `--tsv`, `--no-type-inference`. |
+| `--validate` | Parse the entire input and print a summary (`OK: <n> rows, <m> columns (col TYPE, ...)`) to stdout. Exit 0 on success, exit 2 on parse error. No query required. Compatible with `--delimiter`, `--tsv`, `--no-type-inference`, `-I`/`--input-format` (csv, tsv, json, ndjson). JSON/NDJSON columns are reported as TEXT. |
 | `--columns` | Read the CSV header row, print each column name on its own line, and exit 0. With `-v`/`--verbose`, also shows the inferred type per column (`name INTEGER`). Respects `--delimiter` and `--tsv`. Mutually exclusive with a query argument. |
 | `--output <file>` | Write results to the given file instead of stdout. Creates or overwrites the file. Exits 1 if the file cannot be created. |
 | `-v`, `--verbose` | Print `Loaded <n> rows in <t>s` to stderr after loading (always on TTY; forced with flag) |
diff --git a/build.zig b/build.zig
index 3df9858..070ab25 100644
--- a/build.zig
+++ b/build.zig
@@ -818,6 +818,37 @@ pub fn build(b: *std.Build) void {
     test_validate_with_query.step.dependOn(b.getInstallStep());
     test_step.dependOn(&test_validate_with_query.step);
 
+    // Integration test 79: --validate on valid JSON array
+    const test_validate_json = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf '[{"id":1,"name":"Alice"},{"id":2,"name":"Bob"}]' \
+        \\  | ./zig-out/bin/sql-pipe --validate -I json)
+        \\expected='OK: 2 rows, 2 columns (id TEXT, name TEXT)'
+        \\[ "$result" = "$expected" ]
+    });
+    test_validate_json.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_json.step);
+
+    // Integration test 80: --validate on valid NDJSON
+    const test_validate_ndjson = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf '{"id":1,"name":"Alice"}\n{"id":2,"name":"Bob"}\n' \
+        \\  | ./zig-out/bin/sql-pipe --validate -I ndjson)
+        \\expected='OK: 2 rows, 2 columns (id TEXT, name TEXT)'
+        \\[ "$result" = "$expected" ]
+    });
+    test_validate_ndjson.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_ndjson.step);
+
+    // Integration test 81: --validate on invalid JSON exits 2
+    const test_validate_json_error = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\msg=$(printf '[{"id":1, broken}]' | ./zig-out/bin/sql-pipe --validate -I json 2>&1 >/dev/null; echo "EXIT:$?")
+        \\echo "$msg" | grep -q 'EXIT:2'
+    });
+    test_validate_json_error.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_json_error.step);
+
     // Unit tests for the RFC 4180 CSV parser (src/csv.zig)
     const unit_tests = b.addTest(.{
         .root_module = b.createModule(.{
diff --git a/docs/sql-pipe.1.scd b/docs/sql-pipe.1.scd
index 19358b5..7af41b4 100644
--- a/docs/sql-pipe.1.scd
+++ b/docs/sql-pipe.1.scd
@@ -71,12 +71,14 @@ OPTIONS
 		terminals. Cannot be combined with *-v* / *--verbose*.
 
 	*--validate*
-		Parse the entire CSV input without executing a SQL query. On
-		success, prints a one-line summary to standard output:
+		Parse the entire input without executing a SQL query. On success,
+		prints a one-line summary to standard output:
 		*OK: <n> rows, <m> columns (<col> <TYPE>, ...)* and exits 0.
-		On CSV parse error, prints the error message and exits 2.
-		Compatible with *--delimiter*, *--tsv*, and
-		*--no-type-inference*. Mutually exclusive with a query argument.
+		On parse error, prints the error message and exits 2. Compatible
+		with *--delimiter*, *--tsv*, *--no-type-inference*, and
+		*-I* / *--input-format* (csv, tsv, json, ndjson). JSON and NDJSON
+		columns are reported as TEXT. Mutually exclusive with a query
+		argument.
 
 	*--columns*
 		Read the CSV header row, print each column name on its own line to
diff --git a/src/main.zig b/src/main.zig
index 3f215f1..367f483 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -151,10 +151,10 @@ fn printUsage(writer: *std.Io.Writer) !void {
         \\                               With --columns: show inferred type per column
         \\  -s, --silent                 Suppress row count output unconditionally
         \\                               Cannot be combined with -v/--verbose
-        \\  --validate                   Parse the entire CSV input and print a summary to stdout
+        \\  --validate                   Parse the entire input and print a summary to stdout
         \\                               (OK: <n> rows, <m> columns (<col> <TYPE>, ...))
-        \\                               Exit 0 on success, exit 2 on CSV error. No query required.
-        \\                               Compatible with --delimiter, --tsv, --no-type-inference.
+        \\                               Exit 0 on success, exit 2 on parse error. No query required.
+        \\                               Compatible with --delimiter, --tsv, --no-type-inference, -I.
         \\  --columns                    List column names from input header (one per line) and exit
         \\                               Combine with -v/--verbose to include inferred types
         \\                               Cannot be combined with --output or a query argument
@@ -1484,9 +1484,142 @@ fn runValidate(
                 std.process.exit(@intFromEnum(ExitCode.usage));
             };
         },
-        .json, .ndjson => {
-            // --validate is only meaningful for CSV/TSV input
-            fatal("--validate is only supported for CSV and TSV input", stderr_writer, .usage, .{});
+        .json => {
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+
+            var buf: std.ArrayList(u8) = .empty;
+            defer buf.deinit(allocator);
+            while (true) {
+                const byte = stdin_file_reader.interface.takeByte() catch |err| switch (err) {
+                    error.EndOfStream => break,
+                    error.ReadFailed => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}),
+                };
+                buf.append(allocator, byte) catch fatal("out of memory reading JSON", stderr_writer, .csv_error, .{});
+            }
+            if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{});
+
+            var parsed = std.json.parseFromSlice(std.json.Value, allocator, buf.items, .{}) catch
+                fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
+            defer parsed.deinit();
+
+            const array = switch (parsed.value) {
+                .array => |a| a,
+                else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
+            };
+            if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{});
+
+            const first_obj = switch (array.items[0]) {
+                .object => |o| o,
+                else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}),
+            };
+
+            var num_cols: usize = 0;
+            var ki = first_obj.iterator();
+            while (ki.next()) |_| num_cols += 1;
+
+            var count_buf: [32]u8 = undefined;
+            const count_str = fmtThousands(&count_buf, array.items.len);
+            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, num_cols }) catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+            ki = first_obj.iterator();
+            var col_i: usize = 0;
+            while (ki.next()) |entry| : (col_i += 1) {
+                if (col_i > 0) stdout_writer.writeAll(", ") catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+                stdout_writer.print("{s} TEXT", .{entry.key_ptr.*}) catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+            }
+            stdout_writer.writeAll(")\n") catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+        },
+        .ndjson => {
+            var stdin_buf: [4096]u8 = undefined;
+            var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
+
+            var line_num: usize = 0;
+            var row_count: usize = 0;
+            var cols_owned: ?[][]u8 = null;
+            defer if (cols_owned) |cs| {
+                for (cs) |col| allocator.free(col);
+                allocator.free(cs);
+            };
+
+            while (true) {
+                line_num += 1;
+                const line = json.readLine(allocator, &stdin_file_reader.interface) catch |err| switch (err) {
+                    error.OutOfMemory => fatal("out of memory reading NDJSON", stderr_writer, .csv_error, .{}),
+                    error.ReadFailed => fatal("line {d}: failed to read NDJSON", stderr_writer, .csv_error, .{line_num}),
+                } orelse break;
+                defer allocator.free(line);
+
+                const trimmed = std.mem.trim(u8, line, " \t\r");
+                if (trimmed.len == 0) {
+                    line_num -= 1;
+                    continue;
+                }
+
+                var parsed_line = std.json.parseFromSlice(std.json.Value, allocator, trimmed, .{}) catch
+                    fatal("line {d}: failed to parse NDJSON", stderr_writer, .csv_error, .{line_num});
+                defer parsed_line.deinit();
+
+                const obj = switch (parsed_line.value) {
+                    .object => |o| o,
+                    else => fatal("line {d}: NDJSON element must be a JSON object", stderr_writer, .csv_error, .{line_num}),
+                };
+
+                if (cols_owned == null) {
+                    var col_list: std.ArrayList([]u8) = .empty;
+                    errdefer {
+                        for (col_list.items) |col| allocator.free(col);
+                        col_list.deinit(allocator);
+                    }
+                    var ki = obj.iterator();
+                    while (ki.next()) |entry| {
+                        const owned_key = allocator.dupe(u8, entry.key_ptr.*) catch
+                            fatal("out of memory building column list", stderr_writer, .csv_error, .{});
+                        col_list.append(allocator, owned_key) catch
+                            fatal("out of memory building column list", stderr_writer, .csv_error, .{});
+                    }
+                    if (col_list.items.len == 0)
+                        fatal("line 1: first NDJSON object has no keys", stderr_writer, .csv_error, .{});
+                    cols_owned = col_list.toOwnedSlice(allocator) catch
+                        fatal("out of memory", stderr_writer, .csv_error, .{});
+                }
+                row_count += 1;
+            }
+
+            if (cols_owned == null) fatal("empty NDJSON input", stderr_writer, .csv_error, .{});
+
+            const cols = cols_owned.?;
+            var count_buf: [32]u8 = undefined;
+            const count_str = fmtThousands(&count_buf, row_count);
+            stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, cols.len }) catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
+            for (cols, 0..) |col, i| {
+                if (i > 0) stdout_writer.writeAll(", ") catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+                stdout_writer.print("{s} TEXT", .{col}) catch |err| {
+                    std.log.err("failed to write output: {}", .{err});
+                    std.process.exit(@intFromEnum(ExitCode.usage));
+                };
+            }
+            stdout_writer.writeAll(")\n") catch |err| {
+                std.log.err("failed to write output: {}", .{err});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            };
         },
     }
 }

From 6c1efa1aff6805fd75d10c83c1d59eb0f7fac220 Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@gmail.com>
Date: Thu, 7 May 2026 13:04:42 +0200
Subject: [PATCH 3/3] fix: enforce --validate mutual exclusion with --output
 and --columns

---
 build.zig    | 27 +++++++++++++++++++++++++++
 src/main.zig | 32 ++++++++++++++++++++++++++++----
 2 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/build.zig b/build.zig
index 070ab25..15b5541 100644
--- a/build.zig
+++ b/build.zig
@@ -849,6 +849,33 @@ pub fn build(b: *std.Build) void {
     test_validate_json_error.step.dependOn(b.getInstallStep());
     test_step.dependOn(&test_validate_json_error.step);
 
+    // Integration test 82: --validate --output exits 1 with error
+    const test_validate_output_conflict = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\msg=$(printf 'a,b\n1,2\n' | ./zig-out/bin/sql-pipe --validate --output /tmp/x 2>&1 >/dev/null; echo "EXIT:$?")
+        \\echo "$msg" | grep -q 'error: --output cannot be combined with --validate' && echo "$msg" | grep -q 'EXIT:1'
+    });
+    test_validate_output_conflict.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_output_conflict.step);
+
+    // Integration test 83: --validate --columns exits 1 with error
+    const test_validate_columns_conflict = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\msg=$(printf 'a,b\n1,2\n' | ./zig-out/bin/sql-pipe --validate --columns 2>&1 >/dev/null; echo "EXIT:$?")
+        \\echo "$msg" | grep -q 'error: --validate cannot be combined with --columns' && echo "$msg" | grep -q 'EXIT:1'
+    });
+    test_validate_columns_conflict.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_columns_conflict.step);
+
+    // Integration test 84: --validate on invalid NDJSON exits 2
+    const test_validate_ndjson_error = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\msg=$(printf '{"id":1}\n{broken}\n' | ./zig-out/bin/sql-pipe --validate -I ndjson 2>&1 >/dev/null; echo "EXIT:$?")
+        \\echo "$msg" | grep -q 'EXIT:2'
+    });
+    test_validate_ndjson_error.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_ndjson_error.step);
+
     // Unit tests for the RFC 4180 CSV parser (src/csv.zig)
     const unit_tests = b.addTest(.{
         .root_module = b.createModule(.{
diff --git a/src/main.zig b/src/main.zig
index 367f483..51a3017 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -21,6 +21,8 @@ const SqlPipeError = error{
     SilentVerboseConflict,
     ColumnsWithQuery,
     ValidateWithQuery,
+    ValidateWithColumns,
+    OutputWithValidate,
     InvalidMaxRows,
     InvalidInputFormat,
     InvalidOutputFormat,
@@ -122,7 +124,7 @@ const ArgsResult = union(enum) {
     version,
     /// User requested --columns: list column names and exit.
     columns: ColumnsArgs,
-    /// User requested --validate: parse CSV and print summary.
+    /// User requested --validate: parse input and print summary.
     validate: ValidateArgs,
 };
 
@@ -325,6 +327,14 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
     if (output != null and list_columns)
         return error.OutputWithColumns;
 
+    // --output is mutually exclusive with --validate (--validate always writes to stdout)
+    if (output != null and validate)
+        return error.OutputWithValidate;
+
+    // --validate is mutually exclusive with --columns
+    if (validate and list_columns)
+        return error.ValidateWithColumns;
+
     // --columns is mutually exclusive with a query argument
     if (list_columns and query != null)
         return error.ColumnsWithQuery;
@@ -1357,9 +1367,9 @@ fn runColumns(
 
 /// runValidate(args, allocator, io, stderr_writer, stdout_writer) → void
 /// Pre:  args is valid; allocator and writers are valid
-/// Post: the entire CSV/TSV input has been parsed; on success prints
-///       "OK: <n> rows, <m> columns (<col> <TYPE>, ...)" to stdout and exits 0.
-///       On CSV parse error, prints the error message to stderr and exits 2.
+/// Post: the entire input has been parsed (CSV, TSV, JSON, or NDJSON);
+///       on success prints "OK: <n> rows, <m> columns (<col> <TYPE>, ...)" to stdout.
+///       On parse error, prints the error message to stderr and exits 2.
 fn runValidate(
     args: ValidateArgs,
     allocator: std.mem.Allocator,
@@ -1776,6 +1786,20 @@ pub fn main(init: std.process.Init.Minimal) void {
                 stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr});
                 std.process.exit(@intFromEnum(ExitCode.usage));
             },
+            error.OutputWithValidate => {
+                stderr_writer.writeAll("error: --output cannot be combined with --validate\n") catch |werr| {
+                    std.log.err("failed to write error message: {}", .{werr});
+                };
+                stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            },
+            error.ValidateWithColumns => {
+                stderr_writer.writeAll("error: --validate cannot be combined with --columns\n") catch |werr| {
+                    std.log.err("failed to write error message: {}", .{werr});
+                };
+                stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr});
+                std.process.exit(@intFromEnum(ExitCode.usage));
+            },
             else => {},
         }
         printUsage(stderr_writer) catch |werr| {