diff --git a/build.zig b/build.zig
index 1ac00a5..c723f2f 100644
--- a/build.zig
+++ b/build.zig
@@ -1066,9 +1066,10 @@ pub fn build(b: *std.Build) void {
test_xml_validate.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_xml_validate.step);
- // Integration test 104: --xml-root and --xml-row customize element names
+ // Integration test 104: --xml-root and --xml-row customize element names for output
const test_xml_custom_elements = b.addSystemCommand(&.{
"bash", "-c",
+ // Output: custom element names appear in the XML
\\result=$(printf 'name,age\nAlice,30\n' \
\\ | ./zig-out/bin/sql-pipe -O xml --xml-root data --xml-row record 'SELECT * FROM t')
\\echo "$result" | grep -q '' && echo "$result" | grep -q '' && echo "$result" | grep -q ''
@@ -1108,7 +1109,7 @@ pub fn build(b: *std.Build) void {
// Integration test 108: Root sin rows → error con "no row elements"
const test_xml_no_rows = b.addSystemCommand(&.{
"bash", "-c",
- \\msg=$(printf '' | ./zig-out/bin/sql-pipe -I xml 'SELECT 1' 2>&1; echo "EXIT:$?")
+ \\msg=$(printf '' | ./zig-out/bin/sql-pipe -I xml 'SELECT 1' 2>&1; echo "EXIT:$?")
\\echo "$msg" | grep -q 'no row elements' && echo "$msg" | grep -qv 'EXIT:0'
});
test_xml_no_rows.step.dependOn(b.getInstallStep());
@@ -1162,6 +1163,84 @@ pub fn build(b: *std.Build) void {
test_xml_float_as_int.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_xml_float_as_int.step);
+ // Integration test 114: --xml-root navigates nested XML for input (RSS-like structure)
+ const test_xml_nested_navigation = b.addSystemCommand(&.{
+ "bash", "-c",
+ // Feed with - structure; --xml-root channel --xml-row item
+ // selects only item elements from inside channel, skipping etc.
+ \\doc='My Feed
- Alice30
- Bob25
'
+ \\result=$(printf '%s' "$doc" \
+ \\ | ./zig-out/bin/sql-pipe -I xml --xml-root channel --xml-row item \
+ \\ 'SELECT name || ":" || age FROM t ORDER BY name')
+ \\[ "$result" = "$(printf 'Alice:30\nBob:25')" ]
+ });
+ test_xml_nested_navigation.step.dependOn(b.getInstallStep());
+ test_step.dependOn(&test_xml_nested_navigation.step);
+
+ // Integration test 115: --xml-root / --xml-row with --validate counts only matching rows
+ const test_xml_nested_validate = b.addSystemCommand(&.{
+ "bash", "-c",
+ \\doc='T- 1
- 2
'
+ \\result=$(printf '%s' "$doc" \
+ \\ | ./zig-out/bin/sql-pipe -I xml --xml-root channel --xml-row item --validate)
+ \\echo "$result" | grep -q 'OK: 2 rows'
+ });
+ test_xml_nested_validate.step.dependOn(b.getInstallStep());
+ test_step.dependOn(&test_xml_nested_validate.step);
+
+ // Integration test 116: --xml-root alone (no --xml-row) navigates to container
+ const test_xml_root_alone = b.addSystemCommand(&.{
+ "bash", "-c",
+ \\doc='Alice
Bob
'
+ \\result=$(printf '%s' "$doc" \
+ \\ | ./zig-out/bin/sql-pipe -I xml --xml-root data 'SELECT name FROM t ORDER BY name')
+ \\[ "$result" = "$(printf 'Alice\nBob')" ]
+ });
+ test_xml_root_alone.step.dependOn(b.getInstallStep());
+ test_step.dependOn(&test_xml_root_alone.step);
+
+ // Integration test 117: --xml-row alone (no --xml-root) filters rows by tag
+ const test_xml_row_alone = b.addSystemCommand(&.{
+ "bash", "-c",
+ \\doc='- Alice
1- Bob
'
+ \\result=$(printf '%s' "$doc" \
+ \\ | ./zig-out/bin/sql-pipe -I xml --xml-row item 'SELECT name FROM t ORDER BY name')
+ \\[ "$result" = "$(printf 'Alice\nBob')" ]
+ });
+ test_xml_row_alone.step.dependOn(b.getInstallStep());
+ test_step.dependOn(&test_xml_row_alone.step);
+
+ // Integration test 118: --xml-row with no matching elements exits non-zero with clear message
+ const test_xml_row_no_match = b.addSystemCommand(&.{
+ "bash", "-c",
+ \\msg=$(printf 'Alice
' \
+ \\ | ./zig-out/bin/sql-pipe -I xml --xml-row wrong 'SELECT 1' 2>&1; echo "EXIT:$?")
+ \\echo "$msg" | grep -q "'wrong'" && echo "$msg" | grep -q 'check --xml-row' && echo "$msg" | grep -qv 'EXIT:0'
+ });
+ test_xml_row_no_match.step.dependOn(b.getInstallStep());
+ test_step.dependOn(&test_xml_row_no_match.step);
+
+ // Integration test 119: --columns with --xml-root and --xml-row
+ const test_xml_columns_with_flags = b.addSystemCommand(&.{
+ "bash", "-c",
+ \\doc='- Alice30
'
+ \\result=$(printf '%s' "$doc" \
+ \\ | ./zig-out/bin/sql-pipe -I xml --xml-root channel --xml-row item --columns)
+ \\[ "$result" = "$(printf 'name\nage')" ]
+ });
+ test_xml_columns_with_flags.step.dependOn(b.getInstallStep());
+ test_step.dependOn(&test_xml_columns_with_flags.step);
+
+ // Integration test 120: --xml-root matching the actual document root (fast path)
+ const test_xml_root_fast_path = b.addSystemCommand(&.{
+ "bash", "-c",
+ \\result=$(printf 'Alice
' \
+ \\ | ./zig-out/bin/sql-pipe -I xml --xml-root results 'SELECT name FROM t')
+ \\[ "$result" = "Alice" ]
+ });
+ test_xml_root_fast_path.step.dependOn(b.getInstallStep());
+ test_step.dependOn(&test_xml_root_fast_path.step);
+
// Unit tests for the RFC 4180 CSV parser (src/csv.zig)
const unit_tests = b.addTest(.{
.root_module = b.createModule(.{
diff --git a/src/main.zig b/src/main.zig
index 9f2e132..086d36e 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -105,6 +105,10 @@ const ParsedArgs = struct {
xml_root: []const u8,
/// Row element name for XML output (default: "row").
xml_row: []const u8,
+ /// Root element to navigate to for XML input; null = use actual document root.
+ xml_root_input: ?[]const u8,
+ /// Row tag filter for XML input; null = accept any direct child element as a row.
+ xml_row_input: ?[]const u8,
};
/// Arguments for `--columns` mode.
@@ -115,6 +119,10 @@ const ColumnsArgs = struct {
verbose: bool,
/// Input format (default: csv).
input_format: InputFormat,
+ /// Root element to navigate to for XML input; null = use actual document root.
+ xml_root_input: ?[]const u8,
+ /// Row tag filter for XML input; null = accept any direct child element as a row.
+ xml_row_input: ?[]const u8,
};
/// Arguments for `--validate` mode.
@@ -125,6 +133,10 @@ const ValidateArgs = struct {
type_inference: bool,
/// Input format (default: csv).
input_format: InputFormat,
+ /// Root element to navigate to for XML input; null = use actual document root.
+ xml_root_input: ?[]const u8,
+ /// Row tag filter for XML input; null = accept any direct child element as a row.
+ xml_row_input: ?[]const u8,
};
/// Arguments for `--sample` mode.
@@ -298,6 +310,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
var output: ?[]const u8 = null;
var xml_root: []const u8 = "results";
var xml_row: []const u8 = "row";
+ var xml_root_input: ?[]const u8 = null;
+ var xml_row_input: ?[]const u8 = null;
var sample_mode = false;
var sample_n: usize = 10;
@@ -400,14 +414,18 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
i += 1;
if (i >= args.len) return error.MissingXmlFlagValue;
xml_root = args[i];
+ xml_root_input = args[i];
} else if (std.mem.startsWith(u8, arg, "--xml-root=")) {
xml_root = arg["--xml-root=".len..];
+ xml_root_input = arg["--xml-root=".len..];
} else if (std.mem.eql(u8, arg, "--xml-row")) {
i += 1;
if (i >= args.len) return error.MissingXmlFlagValue;
xml_row = args[i];
+ xml_row_input = args[i];
} else if (std.mem.startsWith(u8, arg, "--xml-row=")) {
xml_row = arg["--xml-row=".len..];
+ xml_row_input = arg["--xml-row=".len..];
} else {
if (query == null) query = arg;
}
@@ -471,6 +489,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
.delimiter = delimiter,
.verbose = verbose,
.input_format = input_format,
+ .xml_root_input = xml_root_input,
+ .xml_row_input = xml_row_input,
} };
// --validate mode: parse CSV and print summary
@@ -479,6 +499,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
.delimiter = delimiter,
.type_inference = type_inference,
.input_format = input_format,
+ .xml_root_input = xml_root_input,
+ .xml_row_input = xml_row_input,
} };
// --sample mode: print schema + first n rows and exit
@@ -503,6 +525,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
.output = output,
.xml_root = xml_root,
.xml_row = xml_row,
+ .xml_root_input = xml_root_input,
+ .xml_row_input = xml_row_input,
} };
}
@@ -1509,7 +1533,7 @@ fn runColumns(
var stdin_buf: [4096]u8 = undefined;
var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
- const names = xml.getXmlColumnNames(allocator, &stdin_file_reader.interface, stderr_writer);
+ const names = xml.getXmlColumnNames(allocator, &stdin_file_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer);
defer {
for (names) |name| allocator.free(name);
allocator.free(names);
@@ -1799,7 +1823,7 @@ fn runValidate(
var stdin_buf: [4096]u8 = undefined;
var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
- const summary = xml.summarizeXml(allocator, &stdin_file_reader.interface, stderr_writer);
+ const summary = xml.summarizeXml(allocator, &stdin_file_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer);
defer {
for (summary.col_names) |name| allocator.free(name);
allocator.free(summary.col_names);
@@ -2019,7 +2043,7 @@ fn run(
.xml => blk: {
var stdin_buf: [4096]u8 = undefined;
var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
- break :blk xml.loadXmlInput(allocator, &stdin_reader.interface, db, parsed.max_rows, stderr_writer);
+ break :blk xml.loadXmlInput(allocator, &stdin_reader.interface, db, parsed.xml_root_input, parsed.xml_row_input, parsed.max_rows, stderr_writer);
},
};
diff --git a/src/xml.zig b/src/xml.zig
index 399e2f2..95e99ad 100644
--- a/src/xml.zig
+++ b/src/xml.zig
@@ -223,7 +223,7 @@ pub fn writeXmlFooter(writer: *std.Io.Writer, root_name: []const u8) !void {
/// var p = XmlParser.init(data);
/// p.skipPrologue(err_writer);
/// const root = p.readRootOpen(err_writer);
-/// while (try p.nextRow(allocator, root, err_writer)) |cols| {
+/// while (try p.nextRow(allocator, root, null, err_writer)) |cols| {
/// defer { for (cols) |col| { if (col.value) |v| allocator.free(v); } allocator.free(cols); }
/// // use cols[i].name and cols[i].value
/// }
@@ -449,6 +449,63 @@ pub const XmlParser = struct {
self.fatalAt("unexpected end of input: unclosed element '{s}'", err_writer, .{elem_name});
}
+ // ─── Element skip ────────────────────────────────────
+
+ /// Skip the body and closing tag of an element.
+ ///
+ /// Pre: positioned just after the element's opening tag '>'
+ /// Post: positioned just after the element's closing ''
+ /// properly handles nested elements, comments, CDATA, and PIs
+ fn skipElementBody(self: *XmlParser, tag: []const u8, err_writer: *std.Io.Writer) void {
+ // depth counts unclosed nested elements inside the one we are skipping
+ var depth: usize = 0;
+ // Loop invariant: depth = number of open nested elements not yet closed
+ // Bounding function: self.data.len - self.pos (finite input)
+ while (true) {
+ const ch = self.peek() orelse break;
+ if (ch != '<') {
+ self.advance();
+ continue;
+ }
+ if (self.startsWith("