diff --git a/build.zig b/build.zig index 1ac00a5..c723f2f 100644 --- a/build.zig +++ b/build.zig @@ -1066,9 +1066,10 @@ pub fn build(b: *std.Build) void { test_xml_validate.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_xml_validate.step); - // Integration test 104: --xml-root and --xml-row customize element names + // Integration test 104: --xml-root and --xml-row customize element names for output const test_xml_custom_elements = b.addSystemCommand(&.{ "bash", "-c", + // Output: custom element names appear in the XML \\result=$(printf 'name,age\nAlice,30\n' \ \\ | ./zig-out/bin/sql-pipe -O xml --xml-root data --xml-row record 'SELECT * FROM t') \\echo "$result" | grep -q '' && echo "$result" | grep -q '' && echo "$result" | grep -q '' @@ -1108,7 +1109,7 @@ pub fn build(b: *std.Build) void { // Integration test 108: Root sin rows → error con "no row elements" const test_xml_no_rows = b.addSystemCommand(&.{ "bash", "-c", - \\msg=$(printf '' | ./zig-out/bin/sql-pipe -I xml 'SELECT 1' 2>&1; echo "EXIT:$?") + \\msg=$(printf '' | ./zig-out/bin/sql-pipe -I xml 'SELECT 1' 2>&1; echo "EXIT:$?") \\echo "$msg" | grep -q 'no row elements' && echo "$msg" | grep -qv 'EXIT:0' }); test_xml_no_rows.step.dependOn(b.getInstallStep()); @@ -1162,6 +1163,84 @@ pub fn build(b: *std.Build) void { test_xml_float_as_int.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_xml_float_as_int.step); + // Integration test 114: --xml-root navigates nested XML for input (RSS-like structure) + const test_xml_nested_navigation = b.addSystemCommand(&.{ + "bash", "-c", + // Feed with structure; --xml-root channel --xml-row item + // selects only item elements from inside channel, skipping etc. + \\doc='<feed><channel><title>My FeedAlice30Bob25' + \\result=$(printf '%s' "$doc" \ + \\ | ./zig-out/bin/sql-pipe -I xml --xml-root channel --xml-row item \ + \\ 'SELECT name || ":" || age FROM t ORDER BY name') + \\[ "$result" = "$(printf 'Alice:30\nBob:25')" ] + }); + test_xml_nested_navigation.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_nested_navigation.step); + + // Integration test 115: --xml-root / --xml-row with --validate counts only matching rows + const test_xml_nested_validate = b.addSystemCommand(&.{ + "bash", "-c", + \\doc='T12' + \\result=$(printf '%s' "$doc" \ + \\ | ./zig-out/bin/sql-pipe -I xml --xml-root channel --xml-row item --validate) + \\echo "$result" | grep -q 'OK: 2 rows' + }); + test_xml_nested_validate.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_nested_validate.step); + + // Integration test 116: --xml-root alone (no --xml-row) navigates to container + const test_xml_root_alone = b.addSystemCommand(&.{ + "bash", "-c", + \\doc='AliceBob' + \\result=$(printf '%s' "$doc" \ + \\ | ./zig-out/bin/sql-pipe -I xml --xml-root data 'SELECT name FROM t ORDER BY name') + \\[ "$result" = "$(printf 'Alice\nBob')" ] + }); + test_xml_root_alone.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_root_alone.step); + + // Integration test 117: --xml-row alone (no --xml-root) filters rows by tag + const test_xml_row_alone = b.addSystemCommand(&.{ + "bash", "-c", + \\doc='Alice1Bob' + \\result=$(printf '%s' "$doc" \ + \\ | ./zig-out/bin/sql-pipe -I xml --xml-row item 'SELECT name FROM t ORDER BY name') + \\[ "$result" = "$(printf 'Alice\nBob')" ] + }); + test_xml_row_alone.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_row_alone.step); + + // Integration test 118: --xml-row with no matching elements exits non-zero with clear message + const test_xml_row_no_match = b.addSystemCommand(&.{ + "bash", "-c", + \\msg=$(printf 'Alice' \ + \\ | ./zig-out/bin/sql-pipe -I xml --xml-row wrong 'SELECT 1' 2>&1; echo "EXIT:$?") + \\echo "$msg" | grep -q "'wrong'" && echo "$msg" | grep -q 'check --xml-row' && echo "$msg" | grep -qv 'EXIT:0' + }); + test_xml_row_no_match.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_row_no_match.step); + + // Integration test 119: --columns with --xml-root and --xml-row + const test_xml_columns_with_flags = b.addSystemCommand(&.{ + "bash", "-c", + \\doc='Alice30' + \\result=$(printf '%s' "$doc" \ + \\ | ./zig-out/bin/sql-pipe -I xml --xml-root channel --xml-row item --columns) + \\[ "$result" = "$(printf 'name\nage')" ] + }); + test_xml_columns_with_flags.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_columns_with_flags.step); + + // Integration test 120: --xml-root matching the actual document root (fast path) + const test_xml_root_fast_path = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'Alice' \ + \\ | ./zig-out/bin/sql-pipe -I xml --xml-root results 'SELECT name FROM t') + \\[ "$result" = "Alice" ] + }); + test_xml_root_fast_path.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_root_fast_path.step); + // Unit tests for the RFC 4180 CSV parser (src/csv.zig) const unit_tests = b.addTest(.{ .root_module = b.createModule(.{ diff --git a/src/main.zig b/src/main.zig index 9f2e132..086d36e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -105,6 +105,10 @@ const ParsedArgs = struct { xml_root: []const u8, /// Row element name for XML output (default: "row"). xml_row: []const u8, + /// Root element to navigate to for XML input; null = use actual document root. + xml_root_input: ?[]const u8, + /// Row tag filter for XML input; null = accept any direct child element as a row. + xml_row_input: ?[]const u8, }; /// Arguments for `--columns` mode. @@ -115,6 +119,10 @@ const ColumnsArgs = struct { verbose: bool, /// Input format (default: csv). input_format: InputFormat, + /// Root element to navigate to for XML input; null = use actual document root. + xml_root_input: ?[]const u8, + /// Row tag filter for XML input; null = accept any direct child element as a row. + xml_row_input: ?[]const u8, }; /// Arguments for `--validate` mode. @@ -125,6 +133,10 @@ const ValidateArgs = struct { type_inference: bool, /// Input format (default: csv). input_format: InputFormat, + /// Root element to navigate to for XML input; null = use actual document root. + xml_root_input: ?[]const u8, + /// Row tag filter for XML input; null = accept any direct child element as a row. + xml_row_input: ?[]const u8, }; /// Arguments for `--sample` mode. @@ -298,6 +310,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { var output: ?[]const u8 = null; var xml_root: []const u8 = "results"; var xml_row: []const u8 = "row"; + var xml_root_input: ?[]const u8 = null; + var xml_row_input: ?[]const u8 = null; var sample_mode = false; var sample_n: usize = 10; @@ -400,14 +414,18 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { i += 1; if (i >= args.len) return error.MissingXmlFlagValue; xml_root = args[i]; + xml_root_input = args[i]; } else if (std.mem.startsWith(u8, arg, "--xml-root=")) { xml_root = arg["--xml-root=".len..]; + xml_root_input = arg["--xml-root=".len..]; } else if (std.mem.eql(u8, arg, "--xml-row")) { i += 1; if (i >= args.len) return error.MissingXmlFlagValue; xml_row = args[i]; + xml_row_input = args[i]; } else if (std.mem.startsWith(u8, arg, "--xml-row=")) { xml_row = arg["--xml-row=".len..]; + xml_row_input = arg["--xml-row=".len..]; } else { if (query == null) query = arg; } @@ -471,6 +489,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { .delimiter = delimiter, .verbose = verbose, .input_format = input_format, + .xml_root_input = xml_root_input, + .xml_row_input = xml_row_input, } }; // --validate mode: parse CSV and print summary @@ -479,6 +499,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { .delimiter = delimiter, .type_inference = type_inference, .input_format = input_format, + .xml_root_input = xml_root_input, + .xml_row_input = xml_row_input, } }; // --sample mode: print schema + first n rows and exit @@ -503,6 +525,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { .output = output, .xml_root = xml_root, .xml_row = xml_row, + .xml_root_input = xml_root_input, + .xml_row_input = xml_row_input, } }; } @@ -1509,7 +1533,7 @@ fn runColumns( var stdin_buf: [4096]u8 = undefined; var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); - const names = xml.getXmlColumnNames(allocator, &stdin_file_reader.interface, stderr_writer); + const names = xml.getXmlColumnNames(allocator, &stdin_file_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer); defer { for (names) |name| allocator.free(name); allocator.free(names); @@ -1799,7 +1823,7 @@ fn runValidate( var stdin_buf: [4096]u8 = undefined; var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); - const summary = xml.summarizeXml(allocator, &stdin_file_reader.interface, stderr_writer); + const summary = xml.summarizeXml(allocator, &stdin_file_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer); defer { for (summary.col_names) |name| allocator.free(name); allocator.free(summary.col_names); @@ -2019,7 +2043,7 @@ fn run( .xml => blk: { var stdin_buf: [4096]u8 = undefined; var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); - break :blk xml.loadXmlInput(allocator, &stdin_reader.interface, db, parsed.max_rows, stderr_writer); + break :blk xml.loadXmlInput(allocator, &stdin_reader.interface, db, parsed.xml_root_input, parsed.xml_row_input, parsed.max_rows, stderr_writer); }, }; diff --git a/src/xml.zig b/src/xml.zig index 399e2f2..95e99ad 100644 --- a/src/xml.zig +++ b/src/xml.zig @@ -223,7 +223,7 @@ pub fn writeXmlFooter(writer: *std.Io.Writer, root_name: []const u8) !void { /// var p = XmlParser.init(data); /// p.skipPrologue(err_writer); /// const root = p.readRootOpen(err_writer); -/// while (try p.nextRow(allocator, root, err_writer)) |cols| { +/// while (try p.nextRow(allocator, root, null, err_writer)) |cols| { /// defer { for (cols) |col| { if (col.value) |v| allocator.free(v); } allocator.free(cols); } /// // use cols[i].name and cols[i].value /// } @@ -449,6 +449,63 @@ pub const XmlParser = struct { self.fatalAt("unexpected end of input: unclosed element '{s}'", err_writer, .{elem_name}); } + // ─── Element skip ──────────────────────────────────── + + /// Skip the body and closing tag of an element. + /// + /// Pre: positioned just after the element's opening tag '>' + /// Post: positioned just after the element's closing '' + /// properly handles nested elements, comments, CDATA, and PIs + fn skipElementBody(self: *XmlParser, tag: []const u8, err_writer: *std.Io.Writer) void { + // depth counts unclosed nested elements inside the one we are skipping + var depth: usize = 0; + // Loop invariant: depth = number of open nested elements not yet closed + // Bounding function: self.data.len - self.pos (finite input) + while (true) { + const ch = self.peek() orelse break; + if (ch != '<') { + self.advance(); + continue; + } + if (self.startsWith("