From 699f16a2b9e47c28616a0f5b63ec3ca65a7cbf19 Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Wed, 23 Apr 2025 20:03:38 -0400 Subject: [PATCH 1/7] Use classes to create describe metadata instead of stringBuilder. --- ice/pom.xml | 13 ++ .../altinity/ice/internal/cmd/Describe.java | 217 +++++++++--------- 2 files changed, 124 insertions(+), 106 deletions(-) diff --git a/ice/pom.xml b/ice/pom.xml index 37f81bad..5014aead 100644 --- a/ice/pom.xml +++ b/ice/pom.xml @@ -14,6 +14,7 @@ 0.9.28 + 1.18.30 @@ -387,6 +388,13 @@ 7.9.0 test + + + org.projectlombok + lombok + ${lombok.version} + provided + @@ -404,6 +412,11 @@ picocli-codegen 4.7.6 + + org.projectlombok + lombok + ${lombok.version} + diff --git a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java index 7f390cb7..8b014267 100644 --- a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java +++ b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java @@ -1,14 +1,15 @@ package com.altinity.ice.internal.cmd; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.JsonNode; +import com.altinity.ice.internal.model.TableMetadata; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; import java.io.IOException; import java.nio.ByteBuffer; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -21,16 +22,13 @@ import org.apache.iceberg.types.Types; public final class Describe { - private Describe() {} - // TODO: refactor: the use of StringBuilder below is absolutely criminal public static void run(RESTCatalog catalog, String target, boolean json, boolean includeMetrics) throws IOException { String targetNamespace = null; String targetTable = null; if (target != null && !target.isEmpty()) { - // TODO: support catalog.ns.table var s = target.split("[.]", 2); switch (s.length) { case 2: @@ -42,138 +40,145 @@ public static void run(RESTCatalog catalog, String target, boolean json, boolean break; } } - // FIXME: there is no need to list nss/tables when target is given - var sb = new StringBuilder(); + + List tables = new ArrayList<>(); List namespaces = catalog.listNamespaces(); + for (Namespace namespace : namespaces) { if (targetNamespace != null && !targetNamespace.equals(namespace.toString())) { continue; } - List tables = catalog.listTables(namespace); - for (TableIdentifier tableId : tables) { + + List tableIds = catalog.listTables(namespace); + for (TableIdentifier tableId : tableIds) { if (targetTable != null && !targetTable.equals(tableId.name())) { continue; } - sb.append("---\n"); - sb.append("kind: Table\n"); - sb.append("metadata:\n"); - sb.append("\tid: " + tableId + "\n"); + Table table = catalog.loadTable(tableId); - sb.append("data:\n"); - sb.append("\tschema_raw: |-\n" + prefixEachLine(table.schema().toString(), "\t\t") + "\n"); - sb.append( - "\tpartition_spec_raw: |-\n" + prefixEachLine(table.spec().toString(), "\t\t") + "\n"); - sb.append( - "\tsort_order_raw: |-\n" + prefixEachLine(table.sortOrder().toString(), "\t\t") + "\n"); - sb.append("\tproperties: \n"); - for (var property : table.properties().entrySet()) { - var v = property.getValue(); - if (v.contains("\n")) { - sb.append("\t\t" + property.getKey() + ": |-\n" + prefixEachLine(v, "\t\t\t") + "\n"); - } else { - sb.append("\t\t" + property.getKey() + ": \"" + v + "\"\n"); - } - } - sb.append("\tlocation: " + table.location() + "\n"); - sb.append("\tcurrent_snapshot: \n"); + TableMetadata tableMetadata = new TableMetadata(); + + // Set metadata + TableMetadata.Metadata metadata = new TableMetadata.Metadata(); + metadata.setId(tableId.toString()); + tableMetadata.setMetadata(metadata); + + // Set data + TableMetadata.TableData data = new TableMetadata.TableData(); + data.setSchema_raw(table.schema().toString()); + data.setPartition_spec_raw(table.spec().toString()); + data.setSort_order_raw(table.sortOrder().toString()); + data.setProperties(table.properties()); + data.setLocation(table.location()); + + // Set current snapshot Snapshot snapshot = table.currentSnapshot(); if (snapshot != null) { - sb.append("\t\tsequence_number: " + snapshot.sequenceNumber() + "\n"); - sb.append("\t\tid: " + snapshot.snapshotId() + "\n"); - sb.append("\t\tparent_id: " + snapshot.parentId() + "\n"); - sb.append("\t\ttimestamp: " + snapshot.timestampMillis() + "\n"); - sb.append( - "\t\ttimestamp_iso: \"" - + Instant.ofEpochMilli(snapshot.timestampMillis()).toString() - + "\"\n"); - sb.append( - "\t\ttimestamp_iso_local: \"" - + Instant.ofEpochMilli(snapshot.timestampMillis()) - .atZone(ZoneId.systemDefault()) - .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME) - + "\"\n"); - sb.append("\t\toperation: " + snapshot.operation() + "\n"); - sb.append("\t\tsummary:\n"); - for (var property : snapshot.summary().entrySet()) { - sb.append("\t\t\t" + property.getKey() + ": \"" + property.getValue() + "\"\n"); - } - sb.append("\t\tlocation: " + snapshot.manifestListLocation() + "\n"); + TableMetadata.SnapshotInfo snapshotInfo = new TableMetadata.SnapshotInfo(); + snapshotInfo.setSequence_number(snapshot.sequenceNumber()); + snapshotInfo.setId(snapshot.snapshotId()); + snapshotInfo.setParent_id(snapshot.parentId()); + snapshotInfo.setTimestamp(snapshot.timestampMillis()); + snapshotInfo.setTimestamp_iso( + Instant.ofEpochMilli(snapshot.timestampMillis()).toString()); + snapshotInfo.setTimestamp_iso_local( + Instant.ofEpochMilli(snapshot.timestampMillis()) + .atZone(ZoneId.systemDefault()) + .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); + snapshotInfo.setOperation(snapshot.operation()); + snapshotInfo.setSummary(snapshot.summary()); + snapshotInfo.setLocation(snapshot.manifestListLocation()); + data.setCurrent_snapshot(snapshotInfo); } if (includeMetrics) { - printTableMetrics(table, sb); + data.setMetrics(getTableMetrics(table)); } + + tableMetadata.setData(data); + tables.add(tableMetadata); } } - String r = sb.toString().replace("\t", " "); + + ObjectMapper mapper; if (json) { - r = convertYamlToJson(r); + mapper = new ObjectMapper(); + } else { + YAMLFactory yamlFactory = + new YAMLFactory() + .disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER) + .enable(YAMLGenerator.Feature.MINIMIZE_QUOTES) + .enable(YAMLGenerator.Feature.INDENT_ARRAYS); + mapper = new ObjectMapper(yamlFactory); + } + + StringBuilder output = new StringBuilder(); + for (TableMetadata table : tables) { + if (!json) { + output.append("---\n"); + } + output.append(mapper.writeValueAsString(table)); + if (!json) { + output.append("\n"); + } } - System.out.println(r); + System.out.print(output.toString()); } - private static void printTableMetrics(Table table, StringBuilder buffer) throws IOException { + private static List getTableMetrics(Table table) throws IOException { + List metrics = new ArrayList<>(); TableScan scan = table.newScan().includeColumnStats(); - CloseableIterable tasks = scan.planFiles(); - for (FileScanTask task : tasks) { - DataFile dataFile = task.file(); - buffer.append("\tmetrics:\n"); - buffer.append("\t\tfile: " + dataFile.path() + "\n"); - buffer.append("\t\trecord_count: " + dataFile.recordCount() + "\n"); + try (CloseableIterable tasks = scan.planFiles()) { + for (FileScanTask task : tasks) { + DataFile dataFile = task.file(); + TableMetadata.MetricsInfo metricsInfo = new TableMetadata.MetricsInfo(); + metricsInfo.setFile(dataFile.path().toString()); + metricsInfo.setRecord_count(dataFile.recordCount()); + metricsInfo.setColumns(new ArrayList<>()); - Map valueCounts = dataFile.valueCounts(); - Map nullCounts = dataFile.nullValueCounts(); - Map lowerBounds = dataFile.lowerBounds(); - Map upperBounds = dataFile.upperBounds(); + Map valueCounts = dataFile.valueCounts(); + Map nullCounts = dataFile.nullValueCounts(); + Map lowerBounds = dataFile.lowerBounds(); + Map upperBounds = dataFile.upperBounds(); - if (valueCounts == null && nullCounts == null && lowerBounds == null && upperBounds == null) { - continue; - } - - buffer.append("\t\tcolumns:\n"); - for (Types.NestedField field : table.schema().columns()) { - int id = field.fieldId(); - buffer.append("\t\t\t" + field.name() + ":\n"); - if (valueCounts != null) { - buffer.append("\t\t\t\tvalue_count: " + valueCounts.get(id) + "\n"); - } - if (nullCounts != null) { - buffer.append("\t\t\t\tnull_count: " + nullCounts.get(id) + "\n"); - } - if (lowerBounds != null) { - ByteBuffer lower = lowerBounds.get(id); - String lowerStr = - lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : "null"; - buffer.append("\t\t\t\tlower_bound: " + lowerStr + "\n"); - } - if (upperBounds != null) { - ByteBuffer upper = upperBounds.get(id); - String upperStr = - upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : "null"; - buffer.append("\t\t\t\tupper_bound: " + upperStr + "\n"); + if (valueCounts == null + && nullCounts == null + && lowerBounds == null + && upperBounds == null) { + continue; } - } - } - tasks.close(); - } + for (Types.NestedField field : table.schema().columns()) { + int id = field.fieldId(); + TableMetadata.ColumnMetrics columnMetrics = new TableMetadata.ColumnMetrics(); + columnMetrics.setName(field.name()); + + if (valueCounts != null) { + columnMetrics.setValue_count(valueCounts.get(id)); + } + if (nullCounts != null) { + columnMetrics.setNull_count(nullCounts.get(id)); + } + if (lowerBounds != null) { + ByteBuffer lower = lowerBounds.get(id); + columnMetrics.setLower_bound( + lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : null); + } + if (upperBounds != null) { + ByteBuffer upper = upperBounds.get(id); + columnMetrics.setUpper_bound( + upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : null); + } - private static String convertYamlToJson(String yaml) throws IOException { - YAMLFactory yamlFactory = new YAMLFactory(); - ObjectMapper yamlReader = new ObjectMapper(yamlFactory); - ObjectMapper jsonWriter = new ObjectMapper(); - StringBuilder result = new StringBuilder(); - try (JsonParser parser = yamlFactory.createParser(yaml)) { - while (!parser.isClosed()) { - JsonNode node = yamlReader.readTree(parser); - if (node != null) { - String json = jsonWriter.writeValueAsString(node); - result.append(json).append("\n"); + metricsInfo.getColumns().add(columnMetrics); } + + metrics.add(metricsInfo); } } - return result.toString().trim(); + + return metrics; } private static String prefixEachLine(String v, String prefix) { From 4dc8ea40f6e8a4e45d409d12a124810b87402d48 Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Wed, 23 Apr 2025 20:05:35 -0400 Subject: [PATCH 2/7] Add TableMetadata class. --- .../ice/internal/model/TableMetadata.java | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java diff --git a/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java b/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java new file mode 100644 index 00000000..0fa9f75c --- /dev/null +++ b/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java @@ -0,0 +1,67 @@ +package com.altinity.ice.internal.model; + +import com.fasterxml.jackson.annotation.JsonInclude; +import java.util.List; +import java.util.Map; +import lombok.Data; + +@JsonInclude(JsonInclude.Include.NON_NULL) +@Data +public class TableMetadata { + private String kind = "Table"; + private Metadata metadata; + private TableData data; + + public void setMetadata(Metadata metadata) { + this.metadata = metadata; + } + + public void setData(TableData data) { + this.data = data; + } + + @Data + public static class Metadata { + private String id; + } + + @Data + public static class TableData { + private String schema_raw; + private String partition_spec_raw; + private String sort_order_raw; + private Map properties; + private String location; + private SnapshotInfo current_snapshot; + private List metrics; + } + + @Data + public static class SnapshotInfo { + private long sequence_number; + private long id; + private Long parent_id; + private long timestamp; + private String timestamp_iso; + private String timestamp_iso_local; + private String operation; + private Map summary; + private String location; + } + + @Data + public static class MetricsInfo { + private String file; + private long record_count; + private List columns; + } + + @Data + public static class ColumnMetrics { + private String name; + private Long value_count; + private Long null_count; + private String lower_bound; + private String upper_bound; + } +} From 0560c40b209ba22ddc5cb9c544ee22f2881f2a99 Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Thu, 24 Apr 2025 17:44:20 -0400 Subject: [PATCH 3/7] ice: Removed lambok depedency and replaced with record. --- ice/pom.xml | 13 --- .../ice/internal/model/TableMetadata.java | 88 +++++++------------ 2 files changed, 31 insertions(+), 70 deletions(-) diff --git a/ice/pom.xml b/ice/pom.xml index 5014aead..37f81bad 100644 --- a/ice/pom.xml +++ b/ice/pom.xml @@ -14,7 +14,6 @@ 0.9.28 - 1.18.30 @@ -388,13 +387,6 @@ 7.9.0 test - - - org.projectlombok - lombok - ${lombok.version} - provided - @@ -412,11 +404,6 @@ picocli-codegen 4.7.6 - - org.projectlombok - lombok - ${lombok.version} - diff --git a/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java b/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java index 0fa9f75c..1cfcba34 100644 --- a/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java +++ b/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java @@ -3,65 +3,39 @@ import com.fasterxml.jackson.annotation.JsonInclude; import java.util.List; import java.util.Map; -import lombok.Data; @JsonInclude(JsonInclude.Include.NON_NULL) -@Data -public class TableMetadata { - private String kind = "Table"; - private Metadata metadata; - private TableData data; - - public void setMetadata(Metadata metadata) { - this.metadata = metadata; - } - - public void setData(TableData data) { - this.data = data; - } - - @Data - public static class Metadata { - private String id; - } - - @Data - public static class TableData { - private String schema_raw; - private String partition_spec_raw; - private String sort_order_raw; - private Map properties; - private String location; - private SnapshotInfo current_snapshot; - private List metrics; - } - - @Data - public static class SnapshotInfo { - private long sequence_number; - private long id; - private Long parent_id; - private long timestamp; - private String timestamp_iso; - private String timestamp_iso_local; - private String operation; - private Map summary; - private String location; +public record TableMetadata(String kind, Metadata metadata, TableData data) { + public TableMetadata { + if (kind == null) { + kind = "Table"; + } } - @Data - public static class MetricsInfo { - private String file; - private long record_count; - private List columns; - } - - @Data - public static class ColumnMetrics { - private String name; - private Long value_count; - private Long null_count; - private String lower_bound; - private String upper_bound; - } + public record Metadata(String id) {} + + public record TableData( + String schema_raw, + String partition_spec_raw, + String sort_order_raw, + Map properties, + String location, + SnapshotInfo current_snapshot, + List metrics) {} + + public record SnapshotInfo( + long sequence_number, + long id, + Long parent_id, + long timestamp, + String timestamp_iso, + String timestamp_iso_local, + String operation, + Map summary, + String location) {} + + public record MetricsInfo(String file, long record_count, List columns) {} + + public record ColumnMetrics( + String name, Long value_count, Long null_count, String lower_bound, String upper_bound) {} } From d222fef97d792cc941a0a754eda52197fd8e3b18 Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Thu, 24 Apr 2025 22:26:45 -0400 Subject: [PATCH 4/7] ice: reformatting of Tablemetadata. --- .../altinity/ice/internal/cmd/Describe.java | 106 ++++++++++-------- 1 file changed, 59 insertions(+), 47 deletions(-) diff --git a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java index 8b014267..53b7549d 100644 --- a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java +++ b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java @@ -56,46 +56,61 @@ public static void run(RESTCatalog catalog, String target, boolean json, boolean } Table table = catalog.loadTable(tableId); - TableMetadata tableMetadata = new TableMetadata(); - - // Set metadata - TableMetadata.Metadata metadata = new TableMetadata.Metadata(); - metadata.setId(tableId.toString()); - tableMetadata.setMetadata(metadata); + TableMetadata tableMetadata = + new TableMetadata("Table", new TableMetadata.Metadata(tableId.toString()), null); // Set data - TableMetadata.TableData data = new TableMetadata.TableData(); - data.setSchema_raw(table.schema().toString()); - data.setPartition_spec_raw(table.spec().toString()); - data.setSort_order_raw(table.sortOrder().toString()); - data.setProperties(table.properties()); - data.setLocation(table.location()); + TableMetadata.TableData data = + new TableMetadata.TableData( + table.schema().toString(), + table.spec().toString(), + table.sortOrder().toString(), + table.properties(), + table.location(), + null, + null); // Set current snapshot Snapshot snapshot = table.currentSnapshot(); if (snapshot != null) { - TableMetadata.SnapshotInfo snapshotInfo = new TableMetadata.SnapshotInfo(); - snapshotInfo.setSequence_number(snapshot.sequenceNumber()); - snapshotInfo.setId(snapshot.snapshotId()); - snapshotInfo.setParent_id(snapshot.parentId()); - snapshotInfo.setTimestamp(snapshot.timestampMillis()); - snapshotInfo.setTimestamp_iso( - Instant.ofEpochMilli(snapshot.timestampMillis()).toString()); - snapshotInfo.setTimestamp_iso_local( - Instant.ofEpochMilli(snapshot.timestampMillis()) - .atZone(ZoneId.systemDefault()) - .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); - snapshotInfo.setOperation(snapshot.operation()); - snapshotInfo.setSummary(snapshot.summary()); - snapshotInfo.setLocation(snapshot.manifestListLocation()); - data.setCurrent_snapshot(snapshotInfo); + TableMetadata.SnapshotInfo snapshotInfo = + new TableMetadata.SnapshotInfo( + snapshot.sequenceNumber(), + snapshot.snapshotId(), + snapshot.parentId(), + snapshot.timestampMillis(), + Instant.ofEpochMilli(snapshot.timestampMillis()).toString(), + Instant.ofEpochMilli(snapshot.timestampMillis()) + .atZone(ZoneId.systemDefault()) + .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME), + snapshot.operation(), + snapshot.summary(), + snapshot.manifestListLocation()); + data = + new TableMetadata.TableData( + data.schema_raw(), + data.partition_spec_raw(), + data.sort_order_raw(), + data.properties(), + data.location(), + snapshotInfo, + data.metrics()); } if (includeMetrics) { - data.setMetrics(getTableMetrics(table)); + List metrics = getTableMetrics(table); + data = + new TableMetadata.TableData( + data.schema_raw(), + data.partition_spec_raw(), + data.sort_order_raw(), + data.properties(), + data.location(), + data.current_snapshot(), + metrics); } - tableMetadata.setData(data); + tableMetadata = new TableMetadata(tableMetadata.kind(), tableMetadata.metadata(), data); tables.add(tableMetadata); } } @@ -132,10 +147,7 @@ private static List getTableMetrics(Table table) thro try (CloseableIterable tasks = scan.planFiles()) { for (FileScanTask task : tasks) { DataFile dataFile = task.file(); - TableMetadata.MetricsInfo metricsInfo = new TableMetadata.MetricsInfo(); - metricsInfo.setFile(dataFile.path().toString()); - metricsInfo.setRecord_count(dataFile.recordCount()); - metricsInfo.setColumns(new ArrayList<>()); + List columns = new ArrayList<>(); Map valueCounts = dataFile.valueCounts(); Map nullCounts = dataFile.nullValueCounts(); @@ -151,30 +163,30 @@ private static List getTableMetrics(Table table) thro for (Types.NestedField field : table.schema().columns()) { int id = field.fieldId(); - TableMetadata.ColumnMetrics columnMetrics = new TableMetadata.ColumnMetrics(); - columnMetrics.setName(field.name()); + String name = field.name(); + Long valueCount = valueCounts != null ? valueCounts.get(id) : null; + Long nullCount = nullCounts != null ? nullCounts.get(id) : null; + String lowerBound = null; + String upperBound = null; - if (valueCounts != null) { - columnMetrics.setValue_count(valueCounts.get(id)); - } - if (nullCounts != null) { - columnMetrics.setNull_count(nullCounts.get(id)); - } if (lowerBounds != null) { ByteBuffer lower = lowerBounds.get(id); - columnMetrics.setLower_bound( - lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : null); + lowerBound = + lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : null; } if (upperBounds != null) { ByteBuffer upper = upperBounds.get(id); - columnMetrics.setUpper_bound( - upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : null); + upperBound = + upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : null; } - metricsInfo.getColumns().add(columnMetrics); + columns.add( + new TableMetadata.ColumnMetrics(name, valueCount, nullCount, lowerBound, upperBound)); } - metrics.add(metricsInfo); + metrics.add( + new TableMetadata.MetricsInfo( + dataFile.path().toString(), dataFile.recordCount(), columns)); } } From 79634532645566ff254f9cd6f4cc44e5e1eb97f0 Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Fri, 25 Apr 2025 16:10:48 -0400 Subject: [PATCH 5/7] ice: reformatting of Tablemetadata. --- .../altinity/ice/internal/cmd/Describe.java | 227 ++++++++---------- 1 file changed, 105 insertions(+), 122 deletions(-) diff --git a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java index 53b7549d..7f390cb7 100644 --- a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java +++ b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java @@ -1,15 +1,14 @@ package com.altinity.ice.internal.cmd; -import com.altinity.ice.internal.model.TableMetadata; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; import java.io.IOException; import java.nio.ByteBuffer; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -22,13 +21,16 @@ import org.apache.iceberg.types.Types; public final class Describe { + private Describe() {} + // TODO: refactor: the use of StringBuilder below is absolutely criminal public static void run(RESTCatalog catalog, String target, boolean json, boolean includeMetrics) throws IOException { String targetNamespace = null; String targetTable = null; if (target != null && !target.isEmpty()) { + // TODO: support catalog.ns.table var s = target.split("[.]", 2); switch (s.length) { case 2: @@ -40,157 +42,138 @@ public static void run(RESTCatalog catalog, String target, boolean json, boolean break; } } - - List tables = new ArrayList<>(); + // FIXME: there is no need to list nss/tables when target is given + var sb = new StringBuilder(); List namespaces = catalog.listNamespaces(); - for (Namespace namespace : namespaces) { if (targetNamespace != null && !targetNamespace.equals(namespace.toString())) { continue; } - - List tableIds = catalog.listTables(namespace); - for (TableIdentifier tableId : tableIds) { + List tables = catalog.listTables(namespace); + for (TableIdentifier tableId : tables) { if (targetTable != null && !targetTable.equals(tableId.name())) { continue; } - + sb.append("---\n"); + sb.append("kind: Table\n"); + sb.append("metadata:\n"); + sb.append("\tid: " + tableId + "\n"); Table table = catalog.loadTable(tableId); - TableMetadata tableMetadata = - new TableMetadata("Table", new TableMetadata.Metadata(tableId.toString()), null); - - // Set data - TableMetadata.TableData data = - new TableMetadata.TableData( - table.schema().toString(), - table.spec().toString(), - table.sortOrder().toString(), - table.properties(), - table.location(), - null, - null); - - // Set current snapshot + sb.append("data:\n"); + sb.append("\tschema_raw: |-\n" + prefixEachLine(table.schema().toString(), "\t\t") + "\n"); + sb.append( + "\tpartition_spec_raw: |-\n" + prefixEachLine(table.spec().toString(), "\t\t") + "\n"); + sb.append( + "\tsort_order_raw: |-\n" + prefixEachLine(table.sortOrder().toString(), "\t\t") + "\n"); + sb.append("\tproperties: \n"); + for (var property : table.properties().entrySet()) { + var v = property.getValue(); + if (v.contains("\n")) { + sb.append("\t\t" + property.getKey() + ": |-\n" + prefixEachLine(v, "\t\t\t") + "\n"); + } else { + sb.append("\t\t" + property.getKey() + ": \"" + v + "\"\n"); + } + } + sb.append("\tlocation: " + table.location() + "\n"); + sb.append("\tcurrent_snapshot: \n"); Snapshot snapshot = table.currentSnapshot(); if (snapshot != null) { - TableMetadata.SnapshotInfo snapshotInfo = - new TableMetadata.SnapshotInfo( - snapshot.sequenceNumber(), - snapshot.snapshotId(), - snapshot.parentId(), - snapshot.timestampMillis(), - Instant.ofEpochMilli(snapshot.timestampMillis()).toString(), - Instant.ofEpochMilli(snapshot.timestampMillis()) + sb.append("\t\tsequence_number: " + snapshot.sequenceNumber() + "\n"); + sb.append("\t\tid: " + snapshot.snapshotId() + "\n"); + sb.append("\t\tparent_id: " + snapshot.parentId() + "\n"); + sb.append("\t\ttimestamp: " + snapshot.timestampMillis() + "\n"); + sb.append( + "\t\ttimestamp_iso: \"" + + Instant.ofEpochMilli(snapshot.timestampMillis()).toString() + + "\"\n"); + sb.append( + "\t\ttimestamp_iso_local: \"" + + Instant.ofEpochMilli(snapshot.timestampMillis()) .atZone(ZoneId.systemDefault()) - .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME), - snapshot.operation(), - snapshot.summary(), - snapshot.manifestListLocation()); - data = - new TableMetadata.TableData( - data.schema_raw(), - data.partition_spec_raw(), - data.sort_order_raw(), - data.properties(), - data.location(), - snapshotInfo, - data.metrics()); + .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME) + + "\"\n"); + sb.append("\t\toperation: " + snapshot.operation() + "\n"); + sb.append("\t\tsummary:\n"); + for (var property : snapshot.summary().entrySet()) { + sb.append("\t\t\t" + property.getKey() + ": \"" + property.getValue() + "\"\n"); + } + sb.append("\t\tlocation: " + snapshot.manifestListLocation() + "\n"); } if (includeMetrics) { - List metrics = getTableMetrics(table); - data = - new TableMetadata.TableData( - data.schema_raw(), - data.partition_spec_raw(), - data.sort_order_raw(), - data.properties(), - data.location(), - data.current_snapshot(), - metrics); + printTableMetrics(table, sb); } - - tableMetadata = new TableMetadata(tableMetadata.kind(), tableMetadata.metadata(), data); - tables.add(tableMetadata); } } - - ObjectMapper mapper; + String r = sb.toString().replace("\t", " "); if (json) { - mapper = new ObjectMapper(); - } else { - YAMLFactory yamlFactory = - new YAMLFactory() - .disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER) - .enable(YAMLGenerator.Feature.MINIMIZE_QUOTES) - .enable(YAMLGenerator.Feature.INDENT_ARRAYS); - mapper = new ObjectMapper(yamlFactory); - } - - StringBuilder output = new StringBuilder(); - for (TableMetadata table : tables) { - if (!json) { - output.append("---\n"); - } - output.append(mapper.writeValueAsString(table)); - if (!json) { - output.append("\n"); - } + r = convertYamlToJson(r); } - System.out.print(output.toString()); + System.out.println(r); } - private static List getTableMetrics(Table table) throws IOException { - List metrics = new ArrayList<>(); + private static void printTableMetrics(Table table, StringBuilder buffer) throws IOException { TableScan scan = table.newScan().includeColumnStats(); + CloseableIterable tasks = scan.planFiles(); - try (CloseableIterable tasks = scan.planFiles()) { - for (FileScanTask task : tasks) { - DataFile dataFile = task.file(); - List columns = new ArrayList<>(); + for (FileScanTask task : tasks) { + DataFile dataFile = task.file(); + buffer.append("\tmetrics:\n"); + buffer.append("\t\tfile: " + dataFile.path() + "\n"); + buffer.append("\t\trecord_count: " + dataFile.recordCount() + "\n"); - Map valueCounts = dataFile.valueCounts(); - Map nullCounts = dataFile.nullValueCounts(); - Map lowerBounds = dataFile.lowerBounds(); - Map upperBounds = dataFile.upperBounds(); + Map valueCounts = dataFile.valueCounts(); + Map nullCounts = dataFile.nullValueCounts(); + Map lowerBounds = dataFile.lowerBounds(); + Map upperBounds = dataFile.upperBounds(); - if (valueCounts == null - && nullCounts == null - && lowerBounds == null - && upperBounds == null) { - continue; - } + if (valueCounts == null && nullCounts == null && lowerBounds == null && upperBounds == null) { + continue; + } - for (Types.NestedField field : table.schema().columns()) { - int id = field.fieldId(); - String name = field.name(); - Long valueCount = valueCounts != null ? valueCounts.get(id) : null; - Long nullCount = nullCounts != null ? nullCounts.get(id) : null; - String lowerBound = null; - String upperBound = null; + buffer.append("\t\tcolumns:\n"); + for (Types.NestedField field : table.schema().columns()) { + int id = field.fieldId(); + buffer.append("\t\t\t" + field.name() + ":\n"); + if (valueCounts != null) { + buffer.append("\t\t\t\tvalue_count: " + valueCounts.get(id) + "\n"); + } + if (nullCounts != null) { + buffer.append("\t\t\t\tnull_count: " + nullCounts.get(id) + "\n"); + } + if (lowerBounds != null) { + ByteBuffer lower = lowerBounds.get(id); + String lowerStr = + lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : "null"; + buffer.append("\t\t\t\tlower_bound: " + lowerStr + "\n"); + } + if (upperBounds != null) { + ByteBuffer upper = upperBounds.get(id); + String upperStr = + upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : "null"; + buffer.append("\t\t\t\tupper_bound: " + upperStr + "\n"); + } + } + } - if (lowerBounds != null) { - ByteBuffer lower = lowerBounds.get(id); - lowerBound = - lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : null; - } - if (upperBounds != null) { - ByteBuffer upper = upperBounds.get(id); - upperBound = - upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : null; - } + tasks.close(); + } - columns.add( - new TableMetadata.ColumnMetrics(name, valueCount, nullCount, lowerBound, upperBound)); + private static String convertYamlToJson(String yaml) throws IOException { + YAMLFactory yamlFactory = new YAMLFactory(); + ObjectMapper yamlReader = new ObjectMapper(yamlFactory); + ObjectMapper jsonWriter = new ObjectMapper(); + StringBuilder result = new StringBuilder(); + try (JsonParser parser = yamlFactory.createParser(yaml)) { + while (!parser.isClosed()) { + JsonNode node = yamlReader.readTree(parser); + if (node != null) { + String json = jsonWriter.writeValueAsString(node); + result.append(json).append("\n"); } - - metrics.add( - new TableMetadata.MetricsInfo( - dataFile.path().toString(), dataFile.recordCount(), columns)); } } - - return metrics; + return result.toString().trim(); } private static String prefixEachLine(String v, String prefix) { From ca59d54392083758b8544d39343a9319c8c9cf9f Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Fri, 25 Apr 2025 16:13:03 -0400 Subject: [PATCH 6/7] ice: Removed option to include metrics. --- ice/src/main/java/com/altinity/ice/Main.java | 8 ++------ .../main/java/com/altinity/ice/internal/cmd/Describe.java | 7 ++----- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/ice/src/main/java/com/altinity/ice/Main.java b/ice/src/main/java/com/altinity/ice/Main.java index a26ed39f..a45eb5a8 100644 --- a/ice/src/main/java/com/altinity/ice/Main.java +++ b/ice/src/main/java/com/altinity/ice/Main.java @@ -69,14 +69,10 @@ void describe( @CommandLine.Option( names = {"--json"}, description = "Output JSON instead of YAML") - boolean json, - @CommandLine.Option( - names = {"--include-metrics"}, - description = "Include table metrics in the output") - boolean includeMetrics) + boolean json) throws IOException { try (RESTCatalog catalog = loadCatalog(this.configFile)) { - Describe.run(catalog, target, json, includeMetrics); + Describe.run(catalog, target, json); } } diff --git a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java index 7f390cb7..e29577f9 100644 --- a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java +++ b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java @@ -25,8 +25,7 @@ public final class Describe { private Describe() {} // TODO: refactor: the use of StringBuilder below is absolutely criminal - public static void run(RESTCatalog catalog, String target, boolean json, boolean includeMetrics) - throws IOException { + public static void run(RESTCatalog catalog, String target, boolean json) throws IOException { String targetNamespace = null; String targetTable = null; if (target != null && !target.isEmpty()) { @@ -100,9 +99,7 @@ public static void run(RESTCatalog catalog, String target, boolean json, boolean sb.append("\t\tlocation: " + snapshot.manifestListLocation() + "\n"); } - if (includeMetrics) { - printTableMetrics(table, sb); - } + printTableMetrics(table, sb); } } String r = sb.toString().replace("\t", " "); From 3743ed8a28d3b0ce0ace8d2dd7fb90fec231d533 Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Mon, 28 Apr 2025 16:46:57 -0400 Subject: [PATCH 7/7] ice: Reverted back change to use Tablemetadata instead of stringBuilder. --- .../altinity/ice/internal/cmd/Describe.java | 150 +++++++----------- 1 file changed, 58 insertions(+), 92 deletions(-) diff --git a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java index e29577f9..26bb853b 100644 --- a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java +++ b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java @@ -1,7 +1,7 @@ package com.altinity.ice.internal.cmd; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.JsonNode; +import com.altinity.ice.internal.model.TableMetadata; +import com.altinity.ice.internal.model.TableMetadata.*; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import java.io.IOException; @@ -9,9 +9,9 @@ import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import org.apache.iceberg.*; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; @@ -24,12 +24,10 @@ public final class Describe { private Describe() {} - // TODO: refactor: the use of StringBuilder below is absolutely criminal public static void run(RESTCatalog catalog, String target, boolean json) throws IOException { String targetNamespace = null; String targetTable = null; if (target != null && !target.isEmpty()) { - // TODO: support catalog.ns.table var s = target.split("[.]", 2); switch (s.length) { case 2: @@ -41,8 +39,8 @@ public static void run(RESTCatalog catalog, String target, boolean json) throws break; } } - // FIXME: there is no need to list nss/tables when target is given - var sb = new StringBuilder(); + + List tablesMetadata = new ArrayList<>(); List namespaces = catalog.listNamespaces(); for (Namespace namespace : namespaces) { if (targetNamespace != null && !targetNamespace.equals(namespace.toString())) { @@ -53,71 +51,55 @@ public static void run(RESTCatalog catalog, String target, boolean json) throws if (targetTable != null && !targetTable.equals(tableId.name())) { continue; } - sb.append("---\n"); - sb.append("kind: Table\n"); - sb.append("metadata:\n"); - sb.append("\tid: " + tableId + "\n"); Table table = catalog.loadTable(tableId); - sb.append("data:\n"); - sb.append("\tschema_raw: |-\n" + prefixEachLine(table.schema().toString(), "\t\t") + "\n"); - sb.append( - "\tpartition_spec_raw: |-\n" + prefixEachLine(table.spec().toString(), "\t\t") + "\n"); - sb.append( - "\tsort_order_raw: |-\n" + prefixEachLine(table.sortOrder().toString(), "\t\t") + "\n"); - sb.append("\tproperties: \n"); - for (var property : table.properties().entrySet()) { - var v = property.getValue(); - if (v.contains("\n")) { - sb.append("\t\t" + property.getKey() + ": |-\n" + prefixEachLine(v, "\t\t\t") + "\n"); - } else { - sb.append("\t\t" + property.getKey() + ": \"" + v + "\"\n"); - } - } - sb.append("\tlocation: " + table.location() + "\n"); - sb.append("\tcurrent_snapshot: \n"); Snapshot snapshot = table.currentSnapshot(); + + SnapshotInfo snapshotInfo = null; if (snapshot != null) { - sb.append("\t\tsequence_number: " + snapshot.sequenceNumber() + "\n"); - sb.append("\t\tid: " + snapshot.snapshotId() + "\n"); - sb.append("\t\tparent_id: " + snapshot.parentId() + "\n"); - sb.append("\t\ttimestamp: " + snapshot.timestampMillis() + "\n"); - sb.append( - "\t\ttimestamp_iso: \"" - + Instant.ofEpochMilli(snapshot.timestampMillis()).toString() - + "\"\n"); - sb.append( - "\t\ttimestamp_iso_local: \"" - + Instant.ofEpochMilli(snapshot.timestampMillis()) + snapshotInfo = + new SnapshotInfo( + snapshot.sequenceNumber(), + snapshot.snapshotId(), + snapshot.parentId(), + snapshot.timestampMillis(), + Instant.ofEpochMilli(snapshot.timestampMillis()).toString(), + Instant.ofEpochMilli(snapshot.timestampMillis()) .atZone(ZoneId.systemDefault()) - .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME) - + "\"\n"); - sb.append("\t\toperation: " + snapshot.operation() + "\n"); - sb.append("\t\tsummary:\n"); - for (var property : snapshot.summary().entrySet()) { - sb.append("\t\t\t" + property.getKey() + ": \"" + property.getValue() + "\"\n"); - } - sb.append("\t\tlocation: " + snapshot.manifestListLocation() + "\n"); + .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME), + snapshot.operation(), + snapshot.summary(), + snapshot.manifestListLocation()); } - printTableMetrics(table, sb); + List metrics = getTableMetrics(table); + + TableData tableData = + new TableData( + table.schema().toString(), + table.spec().toString(), + table.sortOrder().toString(), + table.properties(), + table.location(), + snapshotInfo, + metrics); + + tablesMetadata.add(new TableMetadata("Table", new Metadata(tableId.toString()), tableData)); } } - String r = sb.toString().replace("\t", " "); - if (json) { - r = convertYamlToJson(r); - } - System.out.println(r); + + ObjectMapper mapper = json ? new ObjectMapper() : new ObjectMapper(new YAMLFactory()); + String output = mapper.writeValueAsString(tablesMetadata); + System.out.println(output); } - private static void printTableMetrics(Table table, StringBuilder buffer) throws IOException { + private static List getTableMetrics(Table table) throws IOException { + List metricsList = new ArrayList<>(); TableScan scan = table.newScan().includeColumnStats(); CloseableIterable tasks = scan.planFiles(); for (FileScanTask task : tasks) { DataFile dataFile = task.file(); - buffer.append("\tmetrics:\n"); - buffer.append("\t\tfile: " + dataFile.path() + "\n"); - buffer.append("\t\trecord_count: " + dataFile.recordCount() + "\n"); + List columnMetrics = new ArrayList<>(); Map valueCounts = dataFile.valueCounts(); Map nullCounts = dataFile.nullValueCounts(); @@ -128,52 +110,36 @@ private static void printTableMetrics(Table table, StringBuilder buffer) throws continue; } - buffer.append("\t\tcolumns:\n"); for (Types.NestedField field : table.schema().columns()) { int id = field.fieldId(); - buffer.append("\t\t\t" + field.name() + ":\n"); - if (valueCounts != null) { - buffer.append("\t\t\t\tvalue_count: " + valueCounts.get(id) + "\n"); - } - if (nullCounts != null) { - buffer.append("\t\t\t\tnull_count: " + nullCounts.get(id) + "\n"); - } + String lowerBound = null; + String upperBound = null; + if (lowerBounds != null) { ByteBuffer lower = lowerBounds.get(id); - String lowerStr = - lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : "null"; - buffer.append("\t\t\t\tlower_bound: " + lowerStr + "\n"); + lowerBound = + lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : null; } if (upperBounds != null) { ByteBuffer upper = upperBounds.get(id); - String upperStr = - upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : "null"; - buffer.append("\t\t\t\tupper_bound: " + upperStr + "\n"); + upperBound = + upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : null; } - } - } - - tasks.close(); - } - private static String convertYamlToJson(String yaml) throws IOException { - YAMLFactory yamlFactory = new YAMLFactory(); - ObjectMapper yamlReader = new ObjectMapper(yamlFactory); - ObjectMapper jsonWriter = new ObjectMapper(); - StringBuilder result = new StringBuilder(); - try (JsonParser parser = yamlFactory.createParser(yaml)) { - while (!parser.isClosed()) { - JsonNode node = yamlReader.readTree(parser); - if (node != null) { - String json = jsonWriter.writeValueAsString(node); - result.append(json).append("\n"); - } + columnMetrics.add( + new ColumnMetrics( + field.name(), + valueCounts != null ? valueCounts.get(id) : null, + nullCounts != null ? nullCounts.get(id) : null, + lowerBound, + upperBound)); } + + metricsList.add( + new MetricsInfo(dataFile.path().toString(), dataFile.recordCount(), columnMetrics)); } - return result.toString().trim(); - } - private static String prefixEachLine(String v, String prefix) { - return v.lines().map(line -> prefix + line).collect(Collectors.joining("\n")); + tasks.close(); + return metricsList; } }