Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions examples/scratch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ local-mc mb --ignore-existing local/bucket1

# start Iceberg REST Catalog server
ice-rest-catalog
or using the jar file.
/ice/examples/scratch$ java -jar ../../ice-rest-catalog/target/ice-rest-catalog-jar-with-dependencies.jar &
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this section (of README) is already followed by

TIP: replace ice & ice-rest-catalog above with local-ice & local-ice-rest-catalog respectively to use code in the repo instead of ice & ice-rest-catalog binaries from the PATH.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIP: if you execute link-local, ice & ice-rest-catalog will point to local-ice & local-ice-rest-catalog when inside the repo. Just make sure you have direnv installed.



# insert data into catalog
ice insert flowers.iris -p \
file://iris.parquet
ice
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks unintentional


ice insert nyc.taxis -p \
https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet
Expand Down
8 changes: 6 additions & 2 deletions ice/src/main/java/com/altinity/ice/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,14 @@ void describe(
@CommandLine.Option(
names = {"--json"},
description = "Output JSON instead of YAML")
boolean json)
boolean json,
@CommandLine.Option(
names = {"--include-metrics"},
description = "Include table metrics in the output")
boolean includeMetrics)
throws IOException {
try (RESTCatalog catalog = loadCatalog(this.configFile)) {
Describe.run(catalog, target, json);
Describe.run(catalog, target, json, includeMetrics);
}
}

Expand Down
58 changes: 55 additions & 3 deletions ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,28 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
import org.apache.iceberg.*;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.rest.RESTCatalog;
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Types;

public final class Describe {

private Describe() {}

// TODO: refactor: the use of StringBuilder below is absolutely criminal
public static void run(RESTCatalog catalog, String target, boolean json) throws IOException {
public static void run(RESTCatalog catalog, String target, boolean json, boolean includeMetrics)
throws IOException {
String targetNamespace = null;
String targetTable = null;
if (target != null && !target.isEmpty()) {
Expand Down Expand Up @@ -102,6 +107,10 @@ public static void run(RESTCatalog catalog, String target, boolean json) throws
}
sb.append("\t\t\t\tlocation: " + snapshot.manifestListLocation() + "\n");
}

if (includeMetrics) {
printTableMetrics(table, sb);
}
}
if (!tableMatched) {
sb.append(" []\n");
Expand All @@ -117,6 +126,49 @@ public static void run(RESTCatalog catalog, String target, boolean json) throws
System.out.println(r);
}

/**
* Print table metrics
*
* @param table
*/
private static void printTableMetrics(Table table, StringBuilder buffer) throws IOException {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this javadoc conveys no extra info beyond method name. I'd just drop it

TableScan scan = table.newScan().includeColumnStats();
CloseableIterable<FileScanTask> tasks = scan.planFiles();

for (FileScanTask task : tasks) {
DataFile dataFile = task.file();
buffer.append("\t\t\tmetrics:\n");
buffer.append("\t\t\t file: " + dataFile.path() + "\n");
buffer.append("\t\t\t record_count: " + dataFile.recordCount() + "\n");

Map<Integer, Long> valueCounts = dataFile.valueCounts();
Map<Integer, Long> nullCounts = dataFile.nullValueCounts();
Map<Integer, ByteBuffer> lowerBounds = dataFile.lowerBounds();
Map<Integer, ByteBuffer> upperBounds = dataFile.upperBounds();

buffer.append("\t\t\t columns:\n");
for (Types.NestedField field : table.schema().columns()) {
int id = field.fieldId();
buffer.append("\t\t\t " + field.name() + ":\n");
buffer.append("\t\t\t value_count: " + valueCounts.get(id) + "\n");
buffer.append("\t\t\t null_count: " + nullCounts.get(id) + "\n");

ByteBuffer lower = lowerBounds.get(id);
ByteBuffer upper = upperBounds.get(id);

String lowerStr =
lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : "null";
String upperStr =
upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : "null";

buffer.append("\t\t\t lower_bound: " + lowerStr + "\n");
buffer.append("\t\t\t upper_bound: " + upperStr + "\n");
}
}

tasks.close();
}

private static String convertYamlToJson(String yaml) throws IOException {
ObjectMapper yamlReader = new ObjectMapper(new YAMLFactory());
Object obj = yamlReader.readValue(yaml, Object.class);
Expand Down
14 changes: 6 additions & 8 deletions ice/src/main/java/com/altinity/ice/internal/cmd/Insert.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,7 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.iceberg.AppendFiles;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.*;
import org.apache.iceberg.aws.s3.S3FileIO;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.data.Record;
Expand All @@ -39,6 +34,7 @@
import org.apache.iceberg.mapping.NameMappingParser;
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.parquet.ParquetSchemaUtil;
import org.apache.iceberg.parquet.ParquetUtil;
import org.apache.iceberg.rest.RESTCatalog;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types;
Expand Down Expand Up @@ -137,7 +133,7 @@ public static void run(
RetryLog retryLog =
retryListFile != null && !retryListFile.isEmpty()
? new RetryLog(retryListFile)
: null; ) {
: null) {
boolean atLeastOneFileAppended = false;

// TODO: parallel
Expand Down Expand Up @@ -235,13 +231,15 @@ public static void run(
logger.info("{}: adding data file", file);
long recordCount =
metadata.getBlocks().stream().mapToLong(BlockMetaData::getRowCount).sum();
MetricsConfig metricsConfig = MetricsConfig.forTable(table);
Metrics metrics = ParquetUtil.fileMetrics(inputFile, metricsConfig);
df =
new DataFiles.Builder(table.spec())
.withPath(dataFile)
.withFormat("PARQUET")
.withRecordCount(recordCount)
.withFileSizeInBytes(dataFileSizeInBytes)
// TODO: metrics
.withMetrics(metrics)
.build();
} catch (Exception e) { // FIXME
if (retryLog != null) {
Expand Down