Skip to content

Commit 7d6f10e

Browse files
authored
Merge pull request #1448 from lesserwhirls/szarr
Zarr improvements
2 parents 4927201 + 829eded commit 7d6f10e

25 files changed

Lines changed: 457 additions & 32 deletions

File tree

cdm/core/src/main/java/ucar/nc2/constants/CDM.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1998-2018 John Caron and University Corporation for Atmospheric Research/Unidata
2+
* Copyright (c) 1998-2025 John Caron and University Corporation for Atmospheric Research/Unidata
33
* See LICENSE for license information.
44
*/
55

@@ -66,6 +66,7 @@ public class CDM {
6666
public static final String NCPROPERTIES = "_NCProperties";
6767
public static final String ISNETCDF4 = "_IsNetcdf4";
6868
public static final String SUPERBLOCKVERSION = "_SuperblockVersion";
69+
public static final String ARRAYDIMENSIONS = "_ARRAY_DIMENSIONS";
6970

7071
public static final String[] SPECIALS = {NCPROPERTIES, ISNETCDF4, SUPERBLOCKVERSION};
7172

cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
/*
2-
* Copyright (c) 2021 University Corporation for Atmospheric Research/Unidata
2+
* Copyright (c) 2021-2025 University Corporation for Atmospheric Research/Unidata
33
* See LICENSE for license information.
44
*/
55

66
package ucar.nc2.iosp.zarr;
77

8+
import static ucar.nc2.constants.CDM.ARRAYDIMENSIONS;
9+
810
import com.fasterxml.jackson.databind.ObjectMapper;
911

1012
import ucar.ma2.ArrayObject;
@@ -34,6 +36,7 @@ public class ZarrHeader {
3436
private final RandomAccessDirectory rootRaf;
3537
private final Group.Builder rootGroup;
3638
private final String rootLocation;
39+
3740
private static final ObjectMapper objectMapper = new ObjectMapper();
3841

3942
public ZarrHeader(RandomAccessDirectory raf, Group.Builder rootGroup) {
@@ -219,20 +222,25 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra
219222

220223
for (Attribute attr : attrs) {
221224
final String attrName = attr.getName();
222-
if ("_ARRAY_DIMENSIONS".equals(attrName)) {
225+
if (ARRAYDIMENSIONS.equals(attrName)) {
223226
try {
224-
final ArrayObject.D1 aod1 = (ArrayObject.D1) attr.getValues();
225-
226-
// getSize returns a long
227-
final int aodSize = (int) aod1.getSize();
228-
dimNames = new String[aodSize];
229-
230-
for (int i = 0; i < aodSize; ++i) {
231-
dimNames[i] = (String) aod1.get(i);
227+
if (attr.getLength() == 1 && attr.getStringValue().equals("")) {
228+
// scalar array without a named dimension
229+
logger.debug(" {} is a scalar array without a named dimension", vname);
230+
} else {
231+
final ArrayObject.D1 aod1 = (ArrayObject.D1) attr.getValues();
232+
233+
// getSize returns a long
234+
final int aodSize = (int) aod1.getSize();
235+
dimNames = new String[aodSize];
236+
237+
for (int i = 0; i < aodSize; ++i) {
238+
dimNames[i] = (String) aod1.get(i);
239+
}
240+
hasNamedDimensions = true;
232241
}
233-
hasNamedDimensions = true;
234242
} catch (final Exception exc) {
235-
logger.debug(" Could not extract _ARRAY_DIMENSIONS for {}, {}", vname, exc.getMessage());
243+
logger.debug(" Could not extract {} for {}, {}", ARRAYDIMENSIONS, vname, exc.getMessage());
236244
}
237245
}
238246
}
@@ -260,7 +268,8 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra
260268
final Dimension.Builder dim = Dimension.builder(dname, shape[i]);
261269
dim.setIsVariableLength(false);
262270
dim.setIsUnlimited(false);
263-
dim.setIsShared(false);
271+
// if using named dimensions from _ARRAY_DIMENSIONS, mark the dimension as shared
272+
dim.setIsShared(hasNamedDimensions);
264273

265274
final Dimension dd = dim.build();
266275

@@ -275,6 +284,9 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra
275284
if (dd.getLength() != prevd.getLength()) {
276285
throw new ZarrFormatException("Named dimension " + dname + " seen with inconsistent lengths.");
277286
}
287+
// replace newly created dimension with the previously added dimension
288+
dims.remove(dd);
289+
dims.add(prevd);
278290
} else {
279291
logger.trace("adding {} to group as a shared dimension", dname);
280292
parentGroup.addDimension(dd);
@@ -327,7 +339,13 @@ private List<Attribute> makeAttributes(RandomAccessDirectoryItem item) {
327339
Attribute.Builder attr = Attribute.builder(key);
328340
Object val = attrMap.get(key);
329341
if (val instanceof Collection<?>) {
330-
attr.setValues(Arrays.asList(((Collection) val).toArray()), false);
342+
Collection<?> collection = (Collection<?>) val;
343+
if (collection.isEmpty() && key.equals(ARRAYDIMENSIONS)) {
344+
// scalar array
345+
attr.setValues(Collections.singletonList(""), false);
346+
} else {
347+
attr.setValues(Arrays.asList(collection.toArray()), false);
348+
}
331349
} else if (val instanceof Number) {
332350
attr.setNumericValue((Number) val, false);
333351
} else {
@@ -354,7 +372,8 @@ private static int getChunkIndex(RandomAccessDirectoryItem item, ZArray zarray)
354372

355373
int nDims = zarray.getShape().length;
356374
// verify is data file, else return -1
357-
String pattern = String.format("([0-9]+%c){%d}[0-9]+", zarray.getSeparator().charAt(0), nDims - 1);
375+
String pattern = String.format("([0-9]+%c){%d}[0-9]+", zarray.getSeparator().charAt(0), nDims == 0 ? 0 : nDims - 1);
376+
358377
if (!fileName.matches(pattern)) {
359378
return -1;
360379
}
@@ -363,14 +382,20 @@ private static int getChunkIndex(RandomAccessDirectoryItem item, ZArray zarray)
363382
String[] dims = fileName.split(String.format("\\%c", zarray.getSeparator().charAt(0)));
364383
int[] subs = Arrays.stream(dims).mapToInt(dim -> Integer.parseInt(dim)).toArray();
365384

366-
// get number of chunks in each dimension
367-
int[] nChunks = new int[nDims];
368-
int[] shape = zarray.getShape();
369-
int[] chunkSize = zarray.getChunks();
370-
for (int i = 0; i < nDims; i++) {
371-
nChunks[i] = (int) Math.ceil(shape[i] / chunkSize[i]);
385+
// find chunk number as a flat index
386+
if (nDims != 0) {
387+
// get number of chunks in each dimension
388+
int[] nChunks = new int[nDims];
389+
int[] shape = zarray.getShape();
390+
int[] chunkSize = zarray.getChunks();
391+
for (int i = 0; i < nDims; i++) {
392+
nChunks[i] = (int) Math.ceil(shape[i] / chunkSize[i]);
393+
}
394+
return ZarrUtils.subscriptsToIndex(subs, nChunks);
395+
} else {
396+
// scalar array
397+
return 0;
372398
}
373-
return ZarrUtils.subscriptsToIndex(subs, nChunks);
374399
}
375400

376401
/**

cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
/*
2+
* Copyright (c) 2021-2025 University Corporation for Atmospheric Research/Unidata
3+
* See LICENSE for license information.
4+
*/
5+
16
package ucar.nc2.iosp.zarr;
27

38
import ucar.ma2.Range;
@@ -127,14 +132,20 @@ public LayoutBBTiled.DataChunk next() {
127132

128133
private void incrementChunk() {
129134
// increment index from inner dimension outward
130-
int i = this.currChunk.length - 1;
131-
while (this.currChunk[i] + 1 >= nChunks[i] && i > 0) {
132-
this.currChunk[i] = 0;
133-
i--;
135+
if (this.currChunk.length != 0) {
136+
int i = this.currChunk.length - 1;
137+
while (this.currChunk[i] + 1 >= nChunks[i] && i > 0) {
138+
this.currChunk[i] = 0;
139+
i--;
140+
}
141+
this.currChunk[i]++;
142+
this.currOffset += initializedChunks.getOrDefault(this.chunkNum, (long) 0);
143+
this.chunkNum = ZarrUtils.subscriptsToIndex(this.currChunk, nChunks);
144+
} else {
145+
// scalar array
146+
this.currOffset = 0;
147+
this.chunkNum = 0;
134148
}
135-
this.currChunk[i]++;
136-
this.currOffset += initializedChunks.getOrDefault(this.chunkNum, (long) 0);
137-
this.chunkNum = ZarrUtils.subscriptsToIndex(this.currChunk, nChunks);
138149
}
139150
}
140151

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Examples obtained from https://github.com/zarr-developers/geozarr-spec/tree/cnl-examples/ on 2025-04-28 (commit d1eabecbea63d6f300600d529d7818bbf6df5bcc).
2+
No changes were made.
3+
4+
Examples are licensed under Creative Commons Attribution 4.0 International.
5+
Full text can be found at https://raw.githubusercontent.com/zarr-developers/geozarr-spec/refs/heads/cnl-examples/LICENSE
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"Conventions": "CF-1.8",
3+
"profile": [
4+
"time-series-raster",
5+
"scalar-raster"
6+
]
7+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"zarr_format": 2
3+
}
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
{
2+
"metadata": {
3+
".zattrs": {
4+
"Conventions": "CF-1.8",
5+
"profile": [
6+
"time-series-raster",
7+
"scalar-raster"
8+
]
9+
},
10+
".zgroup": {
11+
"zarr_format": 2
12+
},
13+
"spatial_ref/.zarray": {
14+
"chunks": [],
15+
"compressor": null,
16+
"dtype": "<i8",
17+
"fill_value": null,
18+
"filters": null,
19+
"order": "C",
20+
"shape": [],
21+
"zarr_format": 2
22+
},
23+
"spatial_ref/.zattrs": {
24+
"GeoTransform": "319281.23 12.5 0 5639331.75 0 -12.5",
25+
"_ARRAY_DIMENSIONS": [],
26+
"crs_wkt": "PROJCS[\"WGS 84 / UTM zone 30N\",GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4326\"]],PROJECTION[\"Transverse_Mercator\"],PARAMETER[\"latitude_of_origin\",0],PARAMETER[\"central_meridian\",-3],PARAMETER[\"scale_factor\",0.9996],PARAMETER[\"false_easting\",500000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AXIS[\"Easting\",EAST],AXIS[\"Northing\",NORTH],AUTHORITY[\"EPSG\",\"32630\"]]",
27+
"false_easting": 500000.0,
28+
"false_northing": 0.0,
29+
"geographic_crs_name": "WGS 84",
30+
"grid_mapping_name": "transverse_mercator",
31+
"horizontal_datum_name": "World Geodetic System 1984",
32+
"inverse_flattening": 298.257223563,
33+
"latitude_of_projection_origin": 0.0,
34+
"longitude_of_central_meridian": -3.0,
35+
"prime_meridian_name": "Greenwich",
36+
"projected_crs_name": "WGS 84 / UTM zone 30N",
37+
"reference_ellipsoid_name": "WGS 84",
38+
"scale_factor_at_central_meridian": 0.9996,
39+
"semi_major_axis": 6378137.0,
40+
"semi_minor_axis": 6356752.314245179
41+
},
42+
"temperature/.zarray": {
43+
"chunks": [
44+
2,
45+
128,
46+
128
47+
],
48+
"compressor": {
49+
"blocksize": 0,
50+
"clevel": 5,
51+
"cname": "lz4",
52+
"id": "blosc",
53+
"shuffle": 1
54+
},
55+
"dtype": "<f8",
56+
"fill_value": "NaN",
57+
"filters": null,
58+
"order": "C",
59+
"shape": [
60+
4,
61+
128,
62+
128
63+
],
64+
"zarr_format": 2
65+
},
66+
"temperature/.zattrs": {
67+
"AREA_OR_POINT": "Point",
68+
"_ARRAY_DIMENSIONS": [
69+
"time",
70+
"y",
71+
"x"
72+
],
73+
"grid_mapping": "spatial_ref",
74+
"standard_name": "air_temperature",
75+
"units": "K"
76+
},
77+
"time/.zarray": {
78+
"chunks": [
79+
4
80+
],
81+
"compressor": {
82+
"blocksize": 0,
83+
"clevel": 5,
84+
"cname": "lz4",
85+
"id": "blosc",
86+
"shuffle": 1
87+
},
88+
"dtype": "<i8",
89+
"fill_value": null,
90+
"filters": null,
91+
"order": "C",
92+
"shape": [
93+
4
94+
],
95+
"zarr_format": 2
96+
},
97+
"time/.zattrs": {
98+
"_ARRAY_DIMENSIONS": [
99+
"time"
100+
],
101+
"calendar": "proleptic_gregorian",
102+
"long_name": "time of observation",
103+
"standard_name": "time",
104+
"units": "days since 2020-01-01 00:00:00"
105+
},
106+
"x/.zarray": {
107+
"chunks": [
108+
128
109+
],
110+
"compressor": {
111+
"blocksize": 0,
112+
"clevel": 5,
113+
"cname": "lz4",
114+
"id": "blosc",
115+
"shuffle": 1
116+
},
117+
"dtype": "<f8",
118+
"fill_value": "NaN",
119+
"filters": null,
120+
"order": "C",
121+
"shape": [
122+
128
123+
],
124+
"zarr_format": 2
125+
},
126+
"x/.zattrs": {
127+
"_ARRAY_DIMENSIONS": [
128+
"x"
129+
],
130+
"long_name": "x coordinate of projection",
131+
"standard_name": "projection_x_coordinate",
132+
"units": "metre"
133+
},
134+
"y/.zarray": {
135+
"chunks": [
136+
128
137+
],
138+
"compressor": {
139+
"blocksize": 0,
140+
"clevel": 5,
141+
"cname": "lz4",
142+
"id": "blosc",
143+
"shuffle": 1
144+
},
145+
"dtype": "<f8",
146+
"fill_value": "NaN",
147+
"filters": null,
148+
"order": "C",
149+
"shape": [
150+
128
151+
],
152+
"zarr_format": 2
153+
},
154+
"y/.zattrs": {
155+
"_ARRAY_DIMENSIONS": [
156+
"y"
157+
],
158+
"long_name": "y coordinate of projection",
159+
"standard_name": "projection_y_coordinate",
160+
"units": "metre"
161+
}
162+
},
163+
"zarr_consolidated_format": 1
164+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"chunks": [],
3+
"compressor": null,
4+
"dtype": "<i8",
5+
"fill_value": null,
6+
"filters": null,
7+
"order": "C",
8+
"shape": [],
9+
"zarr_format": 2
10+
}

0 commit comments

Comments
 (0)