From 58873989e806943d11bde544f4560f5286df2ac1 Mon Sep 17 00:00:00 2001 From: "Victor M. Varela" Date: Thu, 7 May 2026 12:52:58 +0200 Subject: [PATCH 1/3] feat: add --validate mode to check CSV syntax without running a query (#88) --- README.md | 1 + build.zig | 38 +++++++++ docs/sql-pipe.1.scd | 8 ++ src/main.zig | 184 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 231 insertions(+) diff --git a/README.md b/README.md index 2a992b8..8ff4f05 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,7 @@ $ cat events.csv \ | `-H`, `--header` | Print column names as the first output row | | `--json` | Alias for `--output-format json` (mutually exclusive with `-H`) | | `--max-rows ` | Stop if more than `n` data rows are read (exit 1) | +| `--validate` | Parse the entire CSV input and print a summary (`OK: rows, columns (...)`) to stdout. Exit 0 on success, exit 2 on CSV error. No query required. Compatible with `--delimiter`, `--tsv`, `--no-type-inference`. | | `--columns` | Read the CSV header row, print each column name on its own line, and exit 0. With `-v`/`--verbose`, also shows the inferred type per column (`name INTEGER`). Respects `--delimiter` and `--tsv`. Mutually exclusive with a query argument. | | `--output ` | Write results to the given file instead of stdout. Creates or overwrites the file. Exits 1 if the file cannot be created. | | `-v`, `--verbose` | Print `Loaded rows in s` to stderr after loading (always on TTY; forced with flag) | diff --git a/build.zig b/build.zig index e24c32c..3df9858 100644 --- a/build.zig +++ b/build.zig @@ -780,6 +780,44 @@ pub fn build(b: *std.Build) void { test_silent_v_conflict.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_silent_v_conflict.step); + // Integration test 75: --validate on valid CSV prints OK summary and exits 0 + const test_validate_ok = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'id,name,amount\n1,Alice,3.14\n2,Bob,2.72\n' | ./zig-out/bin/sql-pipe --validate) + \\expected='OK: 2 rows, 3 columns (id INTEGER, name TEXT, amount REAL)' + \\[ "$result" = "$expected" ] + }); + test_validate_ok.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_ok.step); + + // Integration test 76: --validate on malformed CSV exits 2 + const test_validate_error = b.addSystemCommand(&.{ + "bash", "-c", + \\msg=$(printf 'id,name\n"unterminated' | ./zig-out/bin/sql-pipe --validate 2>&1 >/dev/null; echo "EXIT:$?") + \\echo "$msg" | grep -q 'row 2: unterminated quoted field' && echo "$msg" | grep -q 'EXIT:2' + }); + test_validate_error.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_error.step); + + // Integration test 77: --validate with custom delimiter + const test_validate_delimiter = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'id|name|amount\n1|Alice|3.14\n' | ./zig-out/bin/sql-pipe --validate --delimiter '|') + \\expected='OK: 1 rows, 3 columns (id INTEGER, name TEXT, amount REAL)' + \\[ "$result" = "$expected" ] + }); + test_validate_delimiter.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_delimiter.step); + + // Integration test 78: --validate with query argument exits 1 + const test_validate_with_query = b.addSystemCommand(&.{ + "bash", "-c", + \\msg=$(printf 'a,b\n1,2\n' | ./zig-out/bin/sql-pipe --validate 'SELECT * FROM t' 2>&1 >/dev/null; echo "EXIT:$?") + \\echo "$msg" | grep -q 'error: --validate cannot be combined with a query argument' && echo "$msg" | grep -q 'EXIT:1' + }); + test_validate_with_query.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_with_query.step); + // Unit tests for the RFC 4180 CSV parser (src/csv.zig) const unit_tests = b.addTest(.{ .root_module = b.createModule(.{ diff --git a/docs/sql-pipe.1.scd b/docs/sql-pipe.1.scd index 0ef350f..19358b5 100644 --- a/docs/sql-pipe.1.scd +++ b/docs/sql-pipe.1.scd @@ -70,6 +70,14 @@ OPTIONS stderr is a TTY. Useful for producing clean stderr in interactive terminals. Cannot be combined with *-v* / *--verbose*. + *--validate* + Parse the entire CSV input without executing a SQL query. On + success, prints a one-line summary to standard output: + *OK: rows, columns ( , ...)* and exits 0. + On CSV parse error, prints the error message and exits 2. + Compatible with *--delimiter*, *--tsv*, and + *--no-type-inference*. Mutually exclusive with a query argument. + *--columns* Read the CSV header row, print each column name on its own line to standard output, and exit with code 0. When combined with *-v* / diff --git a/src/main.zig b/src/main.zig index 0f48f84..3f215f1 100644 --- a/src/main.zig +++ b/src/main.zig @@ -20,6 +20,7 @@ const SqlPipeError = error{ IncompatibleFlags, SilentVerboseConflict, ColumnsWithQuery, + ValidateWithQuery, InvalidMaxRows, InvalidInputFormat, InvalidOutputFormat, @@ -101,6 +102,16 @@ const ColumnsArgs = struct { input_format: InputFormat, }; +/// Arguments for `--validate` mode. +const ValidateArgs = struct { + /// CSV field delimiter (default: ','). + delimiter: u8, + /// Infer column types from the first 100 buffered rows when true. + type_inference: bool, + /// Input format (default: csv). + input_format: InputFormat, +}; + /// Result of argument parsing — either parsed arguments or a special action. const ArgsResult = union(enum) { /// Normal execution: run the query. @@ -111,6 +122,8 @@ const ArgsResult = union(enum) { version, /// User requested --columns: list column names and exit. columns: ColumnsArgs, + /// User requested --validate: parse CSV and print summary. + validate: ValidateArgs, }; // ─── Extracted functions ────────────────────────────── @@ -138,6 +151,10 @@ fn printUsage(writer: *std.Io.Writer) !void { \\ With --columns: show inferred type per column \\ -s, --silent Suppress row count output unconditionally \\ Cannot be combined with -v/--verbose + \\ --validate Parse the entire CSV input and print a summary to stdout + \\ (OK: rows, columns ( , ...)) + \\ Exit 0 on success, exit 2 on CSV error. No query required. + \\ Compatible with --delimiter, --tsv, --no-type-inference. \\ --columns List column names from input header (one per line) and exit \\ Combine with -v/--verbose to include inferred types \\ Cannot be combined with --output or a query argument @@ -218,6 +235,7 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { var verbose = false; var silent = false; var list_columns = false; + var validate = false; var output: ?[]const u8 = null; // Loop invariant I: all args[1..i] have been processed; @@ -282,6 +300,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { silent = true; } else if (std.mem.eql(u8, arg, "--columns")) { list_columns = true; + } else if (std.mem.eql(u8, arg, "--validate")) { + validate = true; } else if (std.mem.eql(u8, arg, "--output")) { i += 1; if (i >= args.len) return error.InvalidOutputPath; @@ -309,6 +329,10 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { if (list_columns and query != null) return error.ColumnsWithQuery; + // --validate is mutually exclusive with a query argument + if (validate and query != null) + return error.ValidateWithQuery; + // --silent and --verbose are mutually exclusive if (silent and verbose) return error.SilentVerboseConflict; @@ -321,6 +345,14 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { .input_format = input_format, } }; + // --validate mode: parse CSV and print summary + if (validate) + return .{ .validate = ValidateArgs{ + .delimiter = delimiter, + .type_inference = type_inference, + .input_format = input_format, + } }; + return .{ .parsed = ParsedArgs{ .query = query orelse return error.MissingQuery, .type_inference = type_inference, @@ -1323,6 +1355,142 @@ fn runColumns( } } +/// runValidate(args, allocator, io, stderr_writer, stdout_writer) → void +/// Pre: args is valid; allocator and writers are valid +/// Post: the entire CSV/TSV input has been parsed; on success prints +/// "OK: rows, columns ( , ...)" to stdout and exits 0. +/// On CSV parse error, prints the error message to stderr and exits 2. +fn runValidate( + args: ValidateArgs, + allocator: std.mem.Allocator, + io: std.Io, + stderr_writer: *std.Io.Writer, + stdout_writer: *std.Io.Writer, +) void { + switch (args.input_format) { + .csv, .tsv => { + const col_delim: u8 = if (args.input_format == .tsv) '\t' else args.delimiter; + var stdin_buf: [4096]u8 = undefined; + var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); + var csv_reader = csv.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, col_delim); + + const header_record = csv_reader.nextRecord() catch |err| switch (err) { + error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}), + else => fatal("row 1: failed to parse CSV header", stderr_writer, .csv_error, .{}), + } orelse fatal("empty input (no header row)", stderr_writer, .csv_error, .{}); + defer csv_reader.freeRecord(header_record); + + const cols = parseHeader(allocator, header_record, stderr_writer) catch |err| switch (err) { + error.EmptyColumnName => fatal("row 1: empty column name in header", stderr_writer, .csv_error, .{}), + error.NoColumns => fatal("row 1: no columns found in header", stderr_writer, .csv_error, .{}), + else => fatal("row 1: failed to parse header", stderr_writer, .csv_error, .{}), + }; + defer { + for (cols) |col| allocator.free(col); + allocator.free(cols); + } + + const num_cols = cols.len; + var csv_row_count: usize = 1; // header already read + var data_row_count: usize = 0; + + var row_buffer: std.ArrayList([][]u8) = .empty; + defer { + for (row_buffer.items) |row| csv_reader.freeRecord(row); + row_buffer.deinit(allocator); + } + + // Buffer up to inference_buffer_size rows for type inference + while (row_buffer.items.len < inference_buffer_size) { + const rec = csv_reader.nextRecord() catch |err| switch (err) { + error.UnterminatedQuotedField => fatal( + "row {d}: unterminated quoted field", + stderr_writer, + .csv_error, + .{csv_row_count + 1}, + ), + else => fatal( + "row {d}: failed to parse CSV", + stderr_writer, + .csv_error, + .{csv_row_count + 1}, + ), + } orelse break; + csv_row_count += 1; + if (rec.len == 0) { + csv_reader.freeRecord(rec); + continue; + } + data_row_count += 1; + row_buffer.append(allocator, rec) catch + fatal("out of memory while buffering rows", stderr_writer, .csv_error, .{}); + } + + const types: []ColumnType = if (args.type_inference) blk: { + break :blk inferTypes(allocator, row_buffer.items, num_cols) catch + fatal("out of memory during type inference", stderr_writer, .csv_error, .{}); + } else blk: { + const t = allocator.alloc(ColumnType, num_cols) catch + fatal("out of memory", stderr_writer, .csv_error, .{}); + @memset(t, .TEXT); + break :blk t; + }; + defer allocator.free(types); + + // Stream remaining rows and count them + while (true) { + const record = csv_reader.nextRecord() catch |err| switch (err) { + error.UnterminatedQuotedField => fatal( + "row {d}: unterminated quoted field", + stderr_writer, + .csv_error, + .{csv_row_count + 1}, + ), + else => fatal( + "row {d}: failed to parse CSV", + stderr_writer, + .csv_error, + .{csv_row_count + 1}, + ), + } orelse break; + csv_row_count += 1; + defer csv_reader.freeRecord(record); + if (record.len == 0) continue; + data_row_count += 1; + } + + var count_buf: [32]u8 = undefined; + const count_str = fmtThousands(&count_buf, data_row_count); + + stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, num_cols }) catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + + for (cols, types, 0..) |col, t, i| { + if (i > 0) { + stdout_writer.writeAll(", ") catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + } + stdout_writer.print("{s} {s}", .{ col, @tagName(t) }) catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + } + stdout_writer.writeAll(")\n") catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + }, + .json, .ndjson => { + // --validate is only meaningful for CSV/TSV input + fatal("--validate is only supported for CSV and TSV input", stderr_writer, .usage, .{}); + }, + } +} + /// run(parsed, allocator, io, stderr_writer, stdout_writer) → void /// Pre: parsed contains a valid query; allocator and writers are valid /// Post: input from stdin has been loaded (dispatched on parsed.input_format), @@ -1454,6 +1622,13 @@ pub fn main(init: std.process.Init.Minimal) void { stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); std.process.exit(@intFromEnum(ExitCode.usage)); }, + error.ValidateWithQuery => { + stderr_writer.writeAll("error: --validate cannot be combined with a query argument\n") catch |werr| { + std.log.err("failed to write error message: {}", .{werr}); + }; + stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }, error.InvalidOutputPath => { stderr_writer.writeAll("error: --output requires a non-empty file path\n") catch |werr| { std.log.err("failed to write error message: {}", .{werr}); @@ -1501,6 +1676,15 @@ pub fn main(init: std.process.Init.Minimal) void { std.log.err("failed to flush stderr: {}", .{err}); }; }, + .validate => |val_args| { + runValidate(val_args, allocator, io.io(), stderr_writer, stdout_writer); + stdout_file_writer.flush() catch |err| { + std.log.err("failed to flush stdout: {}", .{err}); + }; + stderr_file_writer.flush() catch |err| { + std.log.err("failed to flush stderr: {}", .{err}); + }; + }, .parsed => |parsed| { if (parsed.output) |output_path| { const output_file = std.Io.Dir.createFile(std.Io.Dir.cwd(), io.io(), output_path, .{}) catch |err| { From baa67d9f6fb9d09c4ffc506520546a5f923c07d1 Mon Sep 17 00:00:00 2001 From: "Victor M. Varela" Date: Thu, 7 May 2026 12:56:16 +0200 Subject: [PATCH 2/3] feat: extend --validate to support JSON and NDJSON input formats --- README.md | 2 +- build.zig | 31 ++++++++++ docs/sql-pipe.1.scd | 12 ++-- src/main.zig | 145 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 178 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 8ff4f05..b0226e4 100644 --- a/README.md +++ b/README.md @@ -211,7 +211,7 @@ $ cat events.csv \ | `-H`, `--header` | Print column names as the first output row | | `--json` | Alias for `--output-format json` (mutually exclusive with `-H`) | | `--max-rows ` | Stop if more than `n` data rows are read (exit 1) | -| `--validate` | Parse the entire CSV input and print a summary (`OK: rows, columns (...)`) to stdout. Exit 0 on success, exit 2 on CSV error. No query required. Compatible with `--delimiter`, `--tsv`, `--no-type-inference`. | +| `--validate` | Parse the entire input and print a summary (`OK: rows, columns (col TYPE, ...)`) to stdout. Exit 0 on success, exit 2 on parse error. No query required. Compatible with `--delimiter`, `--tsv`, `--no-type-inference`, `-I`/`--input-format` (csv, tsv, json, ndjson). JSON/NDJSON columns are reported as TEXT. | | `--columns` | Read the CSV header row, print each column name on its own line, and exit 0. With `-v`/`--verbose`, also shows the inferred type per column (`name INTEGER`). Respects `--delimiter` and `--tsv`. Mutually exclusive with a query argument. | | `--output ` | Write results to the given file instead of stdout. Creates or overwrites the file. Exits 1 if the file cannot be created. | | `-v`, `--verbose` | Print `Loaded rows in s` to stderr after loading (always on TTY; forced with flag) | diff --git a/build.zig b/build.zig index 3df9858..070ab25 100644 --- a/build.zig +++ b/build.zig @@ -818,6 +818,37 @@ pub fn build(b: *std.Build) void { test_validate_with_query.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_validate_with_query.step); + // Integration test 79: --validate on valid JSON array + const test_validate_json = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf '[{"id":1,"name":"Alice"},{"id":2,"name":"Bob"}]' \ + \\ | ./zig-out/bin/sql-pipe --validate -I json) + \\expected='OK: 2 rows, 2 columns (id TEXT, name TEXT)' + \\[ "$result" = "$expected" ] + }); + test_validate_json.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_json.step); + + // Integration test 80: --validate on valid NDJSON + const test_validate_ndjson = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf '{"id":1,"name":"Alice"}\n{"id":2,"name":"Bob"}\n' \ + \\ | ./zig-out/bin/sql-pipe --validate -I ndjson) + \\expected='OK: 2 rows, 2 columns (id TEXT, name TEXT)' + \\[ "$result" = "$expected" ] + }); + test_validate_ndjson.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_ndjson.step); + + // Integration test 81: --validate on invalid JSON exits 2 + const test_validate_json_error = b.addSystemCommand(&.{ + "bash", "-c", + \\msg=$(printf '[{"id":1, broken}]' | ./zig-out/bin/sql-pipe --validate -I json 2>&1 >/dev/null; echo "EXIT:$?") + \\echo "$msg" | grep -q 'EXIT:2' + }); + test_validate_json_error.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_json_error.step); + // Unit tests for the RFC 4180 CSV parser (src/csv.zig) const unit_tests = b.addTest(.{ .root_module = b.createModule(.{ diff --git a/docs/sql-pipe.1.scd b/docs/sql-pipe.1.scd index 19358b5..7af41b4 100644 --- a/docs/sql-pipe.1.scd +++ b/docs/sql-pipe.1.scd @@ -71,12 +71,14 @@ OPTIONS terminals. Cannot be combined with *-v* / *--verbose*. *--validate* - Parse the entire CSV input without executing a SQL query. On - success, prints a one-line summary to standard output: + Parse the entire input without executing a SQL query. On success, + prints a one-line summary to standard output: *OK: rows, columns ( , ...)* and exits 0. - On CSV parse error, prints the error message and exits 2. - Compatible with *--delimiter*, *--tsv*, and - *--no-type-inference*. Mutually exclusive with a query argument. + On parse error, prints the error message and exits 2. Compatible + with *--delimiter*, *--tsv*, *--no-type-inference*, and + *-I* / *--input-format* (csv, tsv, json, ndjson). JSON and NDJSON + columns are reported as TEXT. Mutually exclusive with a query + argument. *--columns* Read the CSV header row, print each column name on its own line to diff --git a/src/main.zig b/src/main.zig index 3f215f1..367f483 100644 --- a/src/main.zig +++ b/src/main.zig @@ -151,10 +151,10 @@ fn printUsage(writer: *std.Io.Writer) !void { \\ With --columns: show inferred type per column \\ -s, --silent Suppress row count output unconditionally \\ Cannot be combined with -v/--verbose - \\ --validate Parse the entire CSV input and print a summary to stdout + \\ --validate Parse the entire input and print a summary to stdout \\ (OK: rows, columns ( , ...)) - \\ Exit 0 on success, exit 2 on CSV error. No query required. - \\ Compatible with --delimiter, --tsv, --no-type-inference. + \\ Exit 0 on success, exit 2 on parse error. No query required. + \\ Compatible with --delimiter, --tsv, --no-type-inference, -I. \\ --columns List column names from input header (one per line) and exit \\ Combine with -v/--verbose to include inferred types \\ Cannot be combined with --output or a query argument @@ -1484,9 +1484,142 @@ fn runValidate( std.process.exit(@intFromEnum(ExitCode.usage)); }; }, - .json, .ndjson => { - // --validate is only meaningful for CSV/TSV input - fatal("--validate is only supported for CSV and TSV input", stderr_writer, .usage, .{}); + .json => { + var stdin_buf: [4096]u8 = undefined; + var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); + + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(allocator); + while (true) { + const byte = stdin_file_reader.interface.takeByte() catch |err| switch (err) { + error.EndOfStream => break, + error.ReadFailed => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}), + }; + buf.append(allocator, byte) catch fatal("out of memory reading JSON", stderr_writer, .csv_error, .{}); + } + if (buf.items.len == 0) fatal("empty input", stderr_writer, .csv_error, .{}); + + var parsed = std.json.parseFromSlice(std.json.Value, allocator, buf.items, .{}) catch + fatal("failed to parse JSON input", stderr_writer, .csv_error, .{}); + defer parsed.deinit(); + + const array = switch (parsed.value) { + .array => |a| a, + else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}), + }; + if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{}); + + const first_obj = switch (array.items[0]) { + .object => |o| o, + else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}), + }; + + var num_cols: usize = 0; + var ki = first_obj.iterator(); + while (ki.next()) |_| num_cols += 1; + + var count_buf: [32]u8 = undefined; + const count_str = fmtThousands(&count_buf, array.items.len); + stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, num_cols }) catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + ki = first_obj.iterator(); + var col_i: usize = 0; + while (ki.next()) |entry| : (col_i += 1) { + if (col_i > 0) stdout_writer.writeAll(", ") catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + stdout_writer.print("{s} TEXT", .{entry.key_ptr.*}) catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + } + stdout_writer.writeAll(")\n") catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + }, + .ndjson => { + var stdin_buf: [4096]u8 = undefined; + var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); + + var line_num: usize = 0; + var row_count: usize = 0; + var cols_owned: ?[][]u8 = null; + defer if (cols_owned) |cs| { + for (cs) |col| allocator.free(col); + allocator.free(cs); + }; + + while (true) { + line_num += 1; + const line = json.readLine(allocator, &stdin_file_reader.interface) catch |err| switch (err) { + error.OutOfMemory => fatal("out of memory reading NDJSON", stderr_writer, .csv_error, .{}), + error.ReadFailed => fatal("line {d}: failed to read NDJSON", stderr_writer, .csv_error, .{line_num}), + } orelse break; + defer allocator.free(line); + + const trimmed = std.mem.trim(u8, line, " \t\r"); + if (trimmed.len == 0) { + line_num -= 1; + continue; + } + + var parsed_line = std.json.parseFromSlice(std.json.Value, allocator, trimmed, .{}) catch + fatal("line {d}: failed to parse NDJSON", stderr_writer, .csv_error, .{line_num}); + defer parsed_line.deinit(); + + const obj = switch (parsed_line.value) { + .object => |o| o, + else => fatal("line {d}: NDJSON element must be a JSON object", stderr_writer, .csv_error, .{line_num}), + }; + + if (cols_owned == null) { + var col_list: std.ArrayList([]u8) = .empty; + errdefer { + for (col_list.items) |col| allocator.free(col); + col_list.deinit(allocator); + } + var ki = obj.iterator(); + while (ki.next()) |entry| { + const owned_key = allocator.dupe(u8, entry.key_ptr.*) catch + fatal("out of memory building column list", stderr_writer, .csv_error, .{}); + col_list.append(allocator, owned_key) catch + fatal("out of memory building column list", stderr_writer, .csv_error, .{}); + } + if (col_list.items.len == 0) + fatal("line 1: first NDJSON object has no keys", stderr_writer, .csv_error, .{}); + cols_owned = col_list.toOwnedSlice(allocator) catch + fatal("out of memory", stderr_writer, .csv_error, .{}); + } + row_count += 1; + } + + if (cols_owned == null) fatal("empty NDJSON input", stderr_writer, .csv_error, .{}); + + const cols = cols_owned.?; + var count_buf: [32]u8 = undefined; + const count_str = fmtThousands(&count_buf, row_count); + stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, cols.len }) catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + for (cols, 0..) |col, i| { + if (i > 0) stdout_writer.writeAll(", ") catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + stdout_writer.print("{s} TEXT", .{col}) catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + } + stdout_writer.writeAll(")\n") catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; }, } } From 6c1efa1aff6805fd75d10c83c1d59eb0f7fac220 Mon Sep 17 00:00:00 2001 From: "Victor M. Varela" Date: Thu, 7 May 2026 13:04:42 +0200 Subject: [PATCH 3/3] fix: enforce --validate mutual exclusion with --output and --columns --- build.zig | 27 +++++++++++++++++++++++++++ src/main.zig | 32 ++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/build.zig b/build.zig index 070ab25..15b5541 100644 --- a/build.zig +++ b/build.zig @@ -849,6 +849,33 @@ pub fn build(b: *std.Build) void { test_validate_json_error.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_validate_json_error.step); + // Integration test 82: --validate --output exits 1 with error + const test_validate_output_conflict = b.addSystemCommand(&.{ + "bash", "-c", + \\msg=$(printf 'a,b\n1,2\n' | ./zig-out/bin/sql-pipe --validate --output /tmp/x 2>&1 >/dev/null; echo "EXIT:$?") + \\echo "$msg" | grep -q 'error: --output cannot be combined with --validate' && echo "$msg" | grep -q 'EXIT:1' + }); + test_validate_output_conflict.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_output_conflict.step); + + // Integration test 83: --validate --columns exits 1 with error + const test_validate_columns_conflict = b.addSystemCommand(&.{ + "bash", "-c", + \\msg=$(printf 'a,b\n1,2\n' | ./zig-out/bin/sql-pipe --validate --columns 2>&1 >/dev/null; echo "EXIT:$?") + \\echo "$msg" | grep -q 'error: --validate cannot be combined with --columns' && echo "$msg" | grep -q 'EXIT:1' + }); + test_validate_columns_conflict.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_columns_conflict.step); + + // Integration test 84: --validate on invalid NDJSON exits 2 + const test_validate_ndjson_error = b.addSystemCommand(&.{ + "bash", "-c", + \\msg=$(printf '{"id":1}\n{broken}\n' | ./zig-out/bin/sql-pipe --validate -I ndjson 2>&1 >/dev/null; echo "EXIT:$?") + \\echo "$msg" | grep -q 'EXIT:2' + }); + test_validate_ndjson_error.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_validate_ndjson_error.step); + // Unit tests for the RFC 4180 CSV parser (src/csv.zig) const unit_tests = b.addTest(.{ .root_module = b.createModule(.{ diff --git a/src/main.zig b/src/main.zig index 367f483..51a3017 100644 --- a/src/main.zig +++ b/src/main.zig @@ -21,6 +21,8 @@ const SqlPipeError = error{ SilentVerboseConflict, ColumnsWithQuery, ValidateWithQuery, + ValidateWithColumns, + OutputWithValidate, InvalidMaxRows, InvalidInputFormat, InvalidOutputFormat, @@ -122,7 +124,7 @@ const ArgsResult = union(enum) { version, /// User requested --columns: list column names and exit. columns: ColumnsArgs, - /// User requested --validate: parse CSV and print summary. + /// User requested --validate: parse input and print summary. validate: ValidateArgs, }; @@ -325,6 +327,14 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { if (output != null and list_columns) return error.OutputWithColumns; + // --output is mutually exclusive with --validate (--validate always writes to stdout) + if (output != null and validate) + return error.OutputWithValidate; + + // --validate is mutually exclusive with --columns + if (validate and list_columns) + return error.ValidateWithColumns; + // --columns is mutually exclusive with a query argument if (list_columns and query != null) return error.ColumnsWithQuery; @@ -1357,9 +1367,9 @@ fn runColumns( /// runValidate(args, allocator, io, stderr_writer, stdout_writer) → void /// Pre: args is valid; allocator and writers are valid -/// Post: the entire CSV/TSV input has been parsed; on success prints -/// "OK: rows, columns ( , ...)" to stdout and exits 0. -/// On CSV parse error, prints the error message to stderr and exits 2. +/// Post: the entire input has been parsed (CSV, TSV, JSON, or NDJSON); +/// on success prints "OK: rows, columns ( , ...)" to stdout. +/// On parse error, prints the error message to stderr and exits 2. fn runValidate( args: ValidateArgs, allocator: std.mem.Allocator, @@ -1776,6 +1786,20 @@ pub fn main(init: std.process.Init.Minimal) void { stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); std.process.exit(@intFromEnum(ExitCode.usage)); }, + error.OutputWithValidate => { + stderr_writer.writeAll("error: --output cannot be combined with --validate\n") catch |werr| { + std.log.err("failed to write error message: {}", .{werr}); + }; + stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }, + error.ValidateWithColumns => { + stderr_writer.writeAll("error: --validate cannot be combined with --columns\n") catch |werr| { + std.log.err("failed to write error message: {}", .{werr}); + }; + stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }, else => {}, } printUsage(stderr_writer) catch |werr| {