Skip to content

Commit 04f7db0

Browse files
authored
Merge pull request #761 from haileyajohnson/s3-perf
[5.x] fix s3 zarr performance issue
2 parents 8c26003 + f025d06 commit 04f7db0

9 files changed

Lines changed: 66 additions & 24 deletions

File tree

cdm/core/src/main/java/ucar/nc2/internal/iosp/hdf4/H4header.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ public class H4header implements HdfHeaderIF {
5858
private static final long maxHeaderPos = 500000; // header's gotta be within this
5959

6060
static boolean isValidFile(ucar.unidata.io.RandomAccessFile raf) throws IOException {
61+
// fail fast on directory
62+
if (raf.isDirectory()) {
63+
return false;
64+
}
6165
long pos = 0;
6266
long size = raf.length();
6367

cdm/core/src/main/java/ucar/nc2/internal/iosp/hdf5/H5headerNew.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,10 @@ public static void setDebugFlags(ucar.nc2.util.DebugFlags debugFlag) {
121121
private static final boolean transformReference = true;
122122

123123
public static boolean isValidFile(RandomAccessFile raf) throws IOException {
124+
// fail fast on directory
125+
if (raf.isDirectory()) {
126+
return false;
127+
}
124128
// For HDF5, we need to search forward
125129
long filePos = 0;
126130
long size = raf.length();

cdm/core/src/main/java/ucar/nc2/internal/iosp/netcdf3/N3headerNew.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ public class N3headerNew {
3030
public static boolean debugHeaderSize; // see NetcdfFile.setDebugFlags
3131

3232
public static boolean isValidFile(ucar.unidata.io.RandomAccessFile raf) throws IOException {
33+
// fail fast on directory
34+
if (raf.isDirectory()) {
35+
return false;
36+
}
3337
switch (NetcdfFileFormat.findNetcdfFormatType(raf)) {
3438
case NETCDF3:
3539
case NETCDF3_64BIT_OFFSET:

cdm/core/src/main/java/ucar/nc2/stream/NcStreamIosp.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ public class NcStreamIosp extends AbstractIOServiceProvider {
4343
private static final boolean debug = false;
4444

4545
public boolean isValidFile(RandomAccessFile raf) throws IOException {
46+
// fail fast on directory
47+
if (raf.isDirectory()) {
48+
return false;
49+
}
4650
raf.seek(0);
4751
if (!readAndTest(raf, NcStream.MAGIC_START))
4852
return false; // must start with these 4 bytes

cdm/s3/src/main/java/thredds/filesystem/s3/ControllerS3.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ public MFile next() {
325325
MFile nextMFile = null;
326326
try {
327327
CdmS3Uri newUri = bucketUri.resolveNewKey(object.key());
328-
nextMFile = new MFileS3(newUri);
328+
nextMFile = new MFileS3(newUri, object.size(), object.lastModified().toEpochMilli());
329329
} catch (URISyntaxException e) {
330330
logger.warn("Cannot create MFile for {} in bucket {}", object.key(), bucketUri.getBucket(), e);
331331
}

cdm/s3/src/main/java/thredds/inventory/s3/MFileS3.java

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,24 +34,41 @@ public class MFileS3 implements MFile {
3434
private final String key;
3535
private final String delimiter;
3636

37+
private long length;
38+
private long lastMod;
39+
3740
private Object auxInfo;
3841

42+
3943
public MFileS3(String s3Uri) throws IOException {
44+
this(s3Uri, -1, -1);
45+
}
46+
47+
public MFileS3(CdmS3Uri s3Uri) {
48+
this(s3Uri, -1, -1);
49+
}
50+
51+
public MFileS3(String s3Uri, long len, long lm) throws IOException {
4052
try {
4153
cdmS3Uri = new CdmS3Uri(s3Uri);
4254
} catch (URISyntaxException e) {
4355
throw new IOException("Unable to create a CdmS3Uri from: " + s3Uri, e);
4456
}
4557
key = getKey();
4658
delimiter = getDelimiter();
59+
length = len;
60+
lastMod = lm;
61+
4762
// This can take some time, so wait to execute until the first time headObjectResponse is accessed
4863
this.headObjectResponse = () -> getHeadObjectResponse();
4964
}
5065

51-
public MFileS3(CdmS3Uri s3Uri) {
66+
public MFileS3(CdmS3Uri s3Uri, long len, long lm) {
5267
cdmS3Uri = s3Uri;
5368
key = getKey();
5469
delimiter = getDelimiter();
70+
length = len;
71+
lastMod = lm;
5572
// This can take some time, so wait to execute until the first time headObjectResponse is accessed
5673
this.headObjectResponse = () -> getHeadObjectResponse();
5774
}
@@ -87,12 +104,25 @@ private HeadObjectResponse getHeadObjectResponse() {
87104

88105
@Override
89106
public long getLastModified() {
90-
return headObjectResponse.get().lastModified().toEpochMilli();
107+
// negative values indicate unavailable, get from head request
108+
return this.lastMod < 0 ? updateLastModified() : this.lastMod;
109+
}
110+
111+
/** Update last modified by fetching from a head request */
112+
public long updateLastModified() {
113+
this.lastMod = headObjectResponse.get().lastModified().toEpochMilli();
114+
return this.lastMod;
91115
}
92116

93117
@Override
94118
public long getLength() {
95-
return headObjectResponse.get().contentLength();
119+
return this.length < 0 ? updateLength() : this.length;
120+
}
121+
122+
/** Update file length by fetching from a head request */
123+
public long updateLength() {
124+
this.length = headObjectResponse.get().contentLength();
125+
return this.length;
96126
}
97127

98128
@Override

cdm/s3/src/test/java/thredds/inventory/s3/TestMFileS3.java

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -93,24 +93,21 @@ public void justBucketOsdc() throws IOException {
9393
//
9494
@Test
9595
public void bucketAndKeyAws() throws IOException {
96-
long lastModified = 1532465845000L;
97-
checkWithBucketAndKey(AWS_G16_S3_OBJECT_1, G16_OBJECT_KEY_1, null, lastModified);
98-
checkWithBucketAndKey(AWS_G16_S3_OBJECT_1 + DELIMITER_FRAGMENT, G16_NAME_1, "/", lastModified);
96+
checkWithBucketAndKey(AWS_G16_S3_OBJECT_1, G16_OBJECT_KEY_1, null);
97+
checkWithBucketAndKey(AWS_G16_S3_OBJECT_1 + DELIMITER_FRAGMENT, G16_NAME_1, "/");
9998
}
10099

101100
@Test
102101
public void bucketAndKeyGcs() throws IOException {
103-
long lastModified = 1504051532000L;
104-
checkWithBucketAndKey(GCS_G16_S3_OBJECT_1, G16_OBJECT_KEY_1, null, lastModified);
105-
checkWithBucketAndKey(GCS_G16_S3_OBJECT_1 + DELIMITER_FRAGMENT, G16_NAME_1, "/", lastModified);
102+
checkWithBucketAndKey(GCS_G16_S3_OBJECT_1, G16_OBJECT_KEY_1, null);
103+
checkWithBucketAndKey(GCS_G16_S3_OBJECT_1 + DELIMITER_FRAGMENT, G16_NAME_1, "/");
106104
}
107105

108106
@Test
109107
@Category(NotPullRequest.class)
110108
public void bucketAndKeyOsdc() throws IOException {
111-
long lastModified = 1611593614000L;
112-
checkWithBucketAndKey(OSDC_G16_S3_OBJECT_1, OSDC_G16_OBJECT_KEY_1, null, lastModified);
113-
checkWithBucketAndKey(OSDC_G16_S3_OBJECT_1 + DELIMITER_FRAGMENT, G16_NAME_1, "/", lastModified);
109+
checkWithBucketAndKey(OSDC_G16_S3_OBJECT_1, OSDC_G16_OBJECT_KEY_1, null);
110+
checkWithBucketAndKey(OSDC_G16_S3_OBJECT_1 + DELIMITER_FRAGMENT, G16_NAME_1, "/");
114111
}
115112

116113
@Test
@@ -184,8 +181,7 @@ private void checkWithBucket(String cdmS3Uri) throws IOException {
184181
assertThat(parent).isNull();
185182
}
186183

187-
private void checkWithBucketAndKey(String cdmS3Uri, String expectedName, String delimiter, long expectedLastModified)
188-
throws IOException {
184+
private void checkWithBucketAndKey(String cdmS3Uri, String expectedName, String delimiter) throws IOException {
189185
logger.info("Checking {}", cdmS3Uri);
190186
MFile mFile = new MFileS3(cdmS3Uri);
191187
assertThat(mFile.getPath()).isEqualTo(cdmS3Uri);
@@ -197,7 +193,6 @@ private void checkWithBucketAndKey(String cdmS3Uri, String expectedName, String
197193
assertThat(mFile.getParent()).isNull();
198194
}
199195
assertThat(mFile.isDirectory()).isFalse();
200-
assertThat(mFile.getLastModified()).isEqualTo(expectedLastModified);
201196
assertThat(mFile.getLength()).isEqualTo(G16_OBJECT_1_SIZE);
202197
}
203198

cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ public class ZarrHeader {
2929

3030
private final RandomAccessDirectory rootRaf;
3131
private final Group.Builder rootGroup;
32+
private final String rootLocation;
3233

3334
private List<Attribute> attrs;
3435

@@ -37,6 +38,7 @@ public class ZarrHeader {
3738
public ZarrHeader(RandomAccessDirectory raf, Group.Builder rootGroup) {
3839
this.rootRaf = raf;
3940
this.rootGroup = rootGroup;
41+
this.rootLocation = ZarrPathUtils.trimLocation(this.rootRaf.getLocation());
4042
this.attrs = null;
4143
}
4244

@@ -46,8 +48,7 @@ public ZarrHeader(RandomAccessDirectory raf, Group.Builder rootGroup) {
4648
* @throws IOException
4749
*/
4850
public void read() throws IOException {
49-
String location = ZarrPathUtils.trimLocation(this.rootRaf.getLocation());
50-
List<RandomAccessDirectoryItem> items = this.rootRaf.getFilesInPath(location);
51+
List<RandomAccessDirectoryItem> items = this.rootRaf.getFilesInPath(this.rootLocation);
5152
// because items are ordered alphabetically, it is safe to assume groups will be created before subgroups and vars
5253
// However, we need to assure we make attrs before their groups and vars
5354
// assumes no files will be read between .zarray and .zattrs
@@ -97,7 +98,7 @@ private void makeGroup(RandomAccessDirectoryItem item) throws ZarrFormatExceptio
9798
// make new Group
9899
Group.Builder group = Group.builder();
99100
String location = ZarrPathUtils.trimLocation(item.getLocation());
100-
if (location.equals(this.rootRaf.getLocation() + ZarrKeys.ZGROUP)) {
101+
if (location.equals(this.rootLocation + ZarrKeys.ZGROUP)) {
101102
group = this.rootGroup;
102103
}
103104
// set Group name
@@ -199,7 +200,7 @@ private List<Attribute> makeAttributes(RandomAccessDirectoryItem item) throws IO
199200
*/
200201
private Group.Builder findGroup(String location) throws ZarrFormatException {
201202
// set Group parent
202-
String groupName = ZarrPathUtils.getParentGroupNameFromPath(location, rootRaf.getLocation());
203+
String groupName = ZarrPathUtils.getParentGroupNameFromPath(location, this.rootLocation);
203204
return this.rootGroup.findGroupNested(groupName).orElseThrow(ZarrFormatException::new);
204205
}
205206

cdm/zarr/src/test/java/ucar/nc2/iosp/zarr/TestZarrIosp.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,19 @@ public class TestZarrIosp {
3838
@Test
3939
public void testIsValidFile() throws IOException {
4040
ZarrIosp iosp = new ZarrIosp();
41+
// zarr stores
4142
assertThat(iosp.isValidFile(NetcdfFiles.getRaf(DIRECTORY_STORE_URI, -1))).isTrue();
4243
assertThat(iosp.isValidFile(NetcdfFiles.getRaf(ZIP_STORE_URI, -1))).isTrue();
43-
// TODO: S3 too slow for large set of objects
44-
// assertThat(iosp.isValidFile(NetcdfFiles.getRaf(OBJECT_STORE_ZARR_URI, -1))).isTrue();
45-
44+
assertThat(iosp.isValidFile(NetcdfFiles.getRaf(OBJECT_STORE_ZARR_URI, -1))).isTrue();
45+
// not zarr stores
4646
assertThat(iosp.isValidFile(NetcdfFiles.getRaf(DIRECTORY_STORE_URI + "/.zgroup", -1))).isFalse();
4747
}
4848

4949
@Test
5050
public void testBuildNcfile() throws IOException {
5151
_testBuildNcfile(DIRECTORY_STORE_URI);
5252
_testBuildNcfile(ZIP_STORE_URI);
53-
// _testBuildNcfile(OBJECT_STORE_ZARR_URI);
53+
_testBuildNcfile(OBJECT_STORE_ZARR_URI);
5454
}
5555

5656
private void _testBuildNcfile(String location) throws IOException {

0 commit comments

Comments
 (0)