Skip to content

Commit 3bce58a

Browse files
author
haileyajohnson
committed
update zarr test data
1 parent 71654bf commit 3bce58a

1,053 files changed

Lines changed: 252 additions & 322 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cdm/zarr/src/test/data/scripts/make_zarr_dtype_test_data.py

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,50 +6,50 @@
66

77
import numpy as np
88
# create data
9-
be_short_data = np.arange(100, dtype='>i2').reshape(10, 10)
10-
le_short_data = np.arange(100, dtype='<i2').reshape(10, 10)
11-
be_int_data = np.arange(100, dtype='>i4').reshape(10, 10)
12-
le_int_data = np.arange(100, dtype='<i4').reshape(10, 10)
9+
be_short_data = np.arange(20, dtype='>i2').reshape(4,5)
10+
le_short_data = np.arange(20, dtype='<i2').reshape(4,5)
11+
be_int_data = np.arange(20, dtype='>i4').reshape(4,5)
12+
le_int_data = np.arange(20, dtype='<i4').reshape(4,5)
1313

1414

1515
# In[ ]:
1616

1717

18-
be_long_data = np.arange(100, dtype='>i8').reshape(10, 10)
19-
le_long_data = np.arange(100, dtype='<i8').reshape(10, 10)
18+
be_long_data = np.arange(20, dtype='>i8').reshape(5,4)
19+
le_long_data = np.arange(20, dtype='<i8').reshape(5,4)
2020

2121

2222
# In[ ]:
2323

2424

25-
be_float_data = np.arange(200, dtype='>f4').reshape(20, 10)
26-
le_float_data = np.arange(200, dtype='<f4').reshape(20, 10)
25+
be_float_data = np.arange(20, dtype='>f4').reshape(4,5)
26+
le_float_data = np.arange(20, dtype='<f4').reshape(4,5)
2727

2828

2929
# In[ ]:
3030

3131

32-
be_double_data = np.arange(100, dtype='>f8').reshape(10, 10)
33-
le_double_data = np.arange(100, dtype='<f8').reshape(10, 10)
32+
be_double_data = np.arange(20, dtype='>f8').reshape(5,4)
33+
le_double_data = np.arange(20, dtype='<f8').reshape(5,4)
3434

3535

3636
# In[ ]:
3737

3838

39-
boolean_data = np.full((10, 4), [0, 1, 0, 1], dtype='|b1')
39+
boolean_data = np.full((4,5), [0, 1, 0, 1,0], dtype='|b1')
4040

4141

4242
# In[ ]:
4343

4444

4545
byte_data = be_int_data.tobytes()
46-
bdata = np.frombuffer(byte_data, dtype='|i1').reshape(20, 20);
46+
bdata = np.frombuffer(byte_data, dtype='|i1').reshape(10,8);
4747

4848

4949
# In[ ]:
5050

5151

52-
charar = np.chararray((10, 12), itemsize=4)
52+
charar = np.chararray((10,12), itemsize=4)
5353
charar[:] = 'abcd'
5454

5555

@@ -65,83 +65,83 @@
6565

6666
root_grp = zarr.group(store, overwrite=True)
6767
# create a group for byte-order independent data types
68-
unordered_group = root_grp.create_group('unordered_group')
68+
unordered_group = root_grp.create_group('unordered_group', overwrite=True)
6969

7070

7171
# create a group for byte-ordered data types
72-
byte_ordered_group = root_grp.create_group('byte_ordered_group')
72+
byte_ordered_group = root_grp.create_group('byte_ordered_group', overwrite=True)
7373
# add groups for big and little endian
74-
big_endian = byte_ordered_group.create_group('big_endian')
75-
little_endian = byte_ordered_group.create_group('little_endian')
74+
big_endian = byte_ordered_group.create_group('big_endian', overwrite=True)
75+
little_endian = byte_ordered_group.create_group('little_endian', overwrite=True)
7676

7777
# create group for string types
78-
string_group = root_grp.create_group('string_types')
78+
string_group = root_grp.create_group('string_types', overwrite=True)
7979

8080

8181
# In[ ]:
8282

8383

8484
# add data to unordered group
85-
b = unordered_group.create_dataset('boolean_data', shape=(10, 4), chunks=(2, 2), dtype='|b1', overwrite=True)
85+
b = unordered_group.create_dataset('boolean_data', shape=(4,5), chunks=(2,5), dtype='|b1', overwrite=True, compressor=None)
8686
b[:] = boolean_data
87-
byte = unordered_group.create_dataset('byte_data', shape=(20, 20), chunks=(3, 3), dtype='|i1', overwrite=True)
87+
byte = unordered_group.create_dataset('byte_data', shape=(10,8), chunks=(5,4), dtype='|i1', overwrite=True, compressor=None)
8888
byte[:] = bdata
89-
ubyte = unordered_group.create_dataset('ubyte_data', shape=(20, 20), chunks=(4, 5), dtype='|u1', overwrite=True)
89+
ubyte = unordered_group.create_dataset('ubyte_data', shape=(10,8), chunks=(5,4), dtype='|u1', overwrite=True, compressor=None)
9090
ubyte[:] = bdata
9191

9292

9393
# In[ ]:
9494

9595

9696
# add data to big endian group
97-
shorts = big_endian.create_dataset('short_data', shape = (10, 10), chunks=(10, 5), dtype='>i2', overwrite=True)
97+
shorts = big_endian.create_dataset('short_data', shape=(4,5), chunks=(2,5), dtype='>i2', overwrite=True, compressor=None)
9898
shorts[:] = be_short_data
99-
ushorts = big_endian.create_dataset('ushort_data', shape = (10, 10), chunks=(10, 5), dtype='>u2', overwrite=True)
99+
ushorts = big_endian.create_dataset('ushort_data', shape=(4,5), chunks=(2,5), dtype='>u2', overwrite=True, compressor=None)
100100
ushorts[:] = be_short_data
101-
ints = big_endian.create_dataset('int_data', shape = (10, 10), chunks=(10, 5), dtype='>i4', overwrite=True)
101+
ints = big_endian.create_dataset('int_data', shape=(4,5), chunks=(2,5), dtype='>i4', overwrite=True, compressor=None)
102102
ints[:] = be_int_data
103-
uints = big_endian.create_dataset('uint_data', shape = (10, 10), chunks=(10, 5), dtype='>u4', overwrite=True)
103+
uints = big_endian.create_dataset('uint_data', shape=(4,5), chunks=(2,5), dtype='>u4', overwrite=True, compressor=None)
104104
uints[:] = be_int_data
105-
longs = big_endian.create_dataset('long_data', shape = (10, 10), chunks=(5, 10), dtype='>i8', overwrite=True)
105+
longs = big_endian.create_dataset('long_data', shape=(5,4), chunks=(5,2), dtype='>i8', overwrite=True, compressor=None)
106106
longs[:] = be_long_data
107-
ulongs = big_endian.create_dataset('ulong_data', shape = (10, 10), chunks=(5, 10), dtype='>u8', overwrite=True)
107+
ulongs = big_endian.create_dataset('ulong_data', shape=(5,4), chunks=(5,2), dtype='>u8', overwrite=True, compressor=None)
108108
ulongs[:] = be_long_data
109-
floats = big_endian.create_dataset('float_data', shape = (20, 10), chunks=(5, 5), dtype='>f4', overwrite=True)
109+
floats = big_endian.create_dataset('float_data', shape=(4,5), chunks=(2,5), dtype='>f4', overwrite=True, compressor=None)
110110
floats[:] = be_float_data
111-
doubles = big_endian.create_dataset('double_data', shape = (10, 10), chunks=(10, 10), dtype='>f8', overwrite=True)
111+
doubles = big_endian.create_dataset('double_data', shape=(5,4), chunks=(5,2), dtype='>f8', overwrite=True, compressor=None)
112112
doubles[:] = be_double_data
113113

114114

115115
# In[ ]:
116116

117117

118118
# add data to little endian group
119-
shorts = little_endian.create_dataset('short_data', shape = (10, 10), chunks=(10, 5), dtype='<i2', overwrite=True)
119+
shorts = little_endian.create_dataset('short_data', shape=(4,5), chunks=(2,5), dtype='<i2', overwrite=True, compressor=None)
120120
shorts[:] = le_short_data
121-
ushorts = little_endian.create_dataset('ushort_data', shape = (10, 10), chunks=(10, 5), dtype='<u2', overwrite=True)
121+
ushorts = little_endian.create_dataset('ushort_data', shape=(4,5), chunks=(2,5), dtype='<u2', overwrite=True, compressor=None)
122122
ushorts[:] = le_short_data
123-
ints = little_endian.create_dataset('int_data', shape = (10, 10), chunks=(10, 5), dtype='<i4', overwrite=True)
123+
ints = little_endian.create_dataset('int_data', shape=(4,5), chunks=(2,5), dtype='<i4', overwrite=True, compressor=None)
124124
ints[:] = le_int_data
125-
uints = little_endian.create_dataset('uint_data', shape = (10, 10), chunks=(10, 5), dtype='<u4', overwrite=True)
125+
uints = little_endian.create_dataset('uint_data', shape=(4,5), chunks=(2,5), dtype='<u4', overwrite=True, compressor=None)
126126
uints[:] = le_int_data
127-
longs = little_endian.create_dataset('long_data', shape = (10, 10), chunks=(5, 10), dtype='<i8', overwrite=True)
127+
longs = little_endian.create_dataset('long_data', shape=(5,4), chunks=(5,2), dtype='<i8', overwrite=True, compressor=None)
128128
longs[:] = le_long_data
129-
ulongs = little_endian.create_dataset('ulong_data', shape = (10, 10), chunks=(5, 10), dtype='<u8', overwrite=True)
129+
ulongs = little_endian.create_dataset('ulong_data', shape=(5,4), chunks=(5,2), dtype='<u8', overwrite=True, compressor=None)
130130
ulongs[:] = le_long_data
131-
floats = little_endian.create_dataset('float_data', shape = (20, 10), chunks=(5, 5), dtype='<f4', overwrite=True)
131+
floats = little_endian.create_dataset('float_data', shape=(4,5), chunks=(2,5), dtype='<f4', overwrite=True, compressor=None)
132132
floats[:] = le_float_data
133-
doubles = little_endian.create_dataset('double_data', shape = (10, 10), chunks=(10, 10), dtype='<f8', overwrite=True)
133+
doubles = little_endian.create_dataset('double_data', shape=(5,4), chunks=(5,2), dtype='<f8', overwrite=True, compressor=None)
134134
doubles[:] = le_double_data
135135

136136

137137
# In[ ]:
138138

139139

140140
# add string data
141-
chars = string_group.create_dataset('char_data', shape=(10,12), chunks=(2,4), dtype='S1', overwrite=True)
141+
chars = string_group.create_dataset('char_data', shape=(10,12), chunks=(5,3), dtype='S1', overwrite=True, compressor=None)
142142
chars[:] = charar
143-
strs = string_group.create_dataset('str_data', shape=(10,12), chunks=(2,2), dtype='S4', overwrite=True)
143+
strs = string_group.create_dataset('str_data', shape=(10,12), chunks=(5,6), dtype='S4', overwrite=True, compressor=None)
144144
strs[:] = charar
145-
unicode = string_group.create_dataset('unicode_data', shape=(10,12), chunks=(2,2), dtype='U4', overwrite=True)
145+
unicode = string_group.create_dataset('unicode_data', shape=(10,12), chunks=(5,6), dtype='U4', overwrite=True, compressor=None)
146146
unicode[:] = charar
147147

cdm/zarr/src/test/data/scripts/make_zarr_test_data.py

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,13 @@
77
import zarr
88
## toggle comment to switch between directory store and zip store
99
store = zarr.DirectoryStore('../zarr_test_data.zarr')
10-
# store = zarr.ZipStore('../zarr_test_data.zip')
10+
# store = zarr.ZipStore('../zarr_test_data.zip', mode='w')
1111
root_grp = zarr.group(store, overwrite=True)
1212

1313

1414
# In[ ]:
1515

1616

17-
# make array without group and uninitialized data, set fill_vaue
18-
a = zarr.create(shape=(20, 20), chunks=(10, 10), dtype='f4', fill_value=999.0, store=store, overwrite=True)
19-
a[:] = 0
20-
21-
22-
# In[ ]:
23-
24-
2517
# make group with '/' separator and 'F' order
2618
attrs_grp = root_grp.create_group('group_with_attrs', overwrite=True)
2719

@@ -37,7 +29,7 @@
3729

3830

3931
# add array to group with 'F' order
40-
a = attrs_grp.create_dataset('F_order_array', shape=(20, 20), chunks=(4, 5), dtype='i4', order='F', overwrite=True)
32+
a = attrs_grp.create_dataset('F_order_array', shape=(20, 20), chunks=(4, 5), dtype='i4', order='F', overwrite=True, compressor=None)
4133

4234

4335
# In[ ]:
@@ -61,6 +53,32 @@
6153
# In[ ]:
6254

6355

56+
# create uninitialized array
57+
a = attrs_grp.create_dataset('uninitialized', shape=(20, 20), chunks=(10, 10), dtype='f4', fill_value=999.0, overwrite=True, compressor=None)
58+
59+
60+
# In[ ]:
61+
62+
63+
# create partially initialized arrays
64+
a = attrs_grp.create_dataset('partial_fill1', shape=(20, 20), chunks=(10, 10), dtype='f4', fill_value=999.0, overwrite=True, compressor=None)
65+
a[0:10,0:10]=0
66+
a = attrs_grp.create_dataset('partial_fill2', shape=(20, 20), chunks=(10, 10), dtype='f4', fill_value=999.0, overwrite=True, compressor=None)
67+
a[15:20,10:20]=0
68+
69+
70+
# In[ ]:
71+
72+
73+
# create nested arrays
74+
# dimension_separator keyword does not work for now, data is manually edited
75+
a = attrs_grp.create_dataset('nested', shape=(20, 20), chunks=(10, 10), dtype='i2', overwrite=True, compressor=None) #, dimension_separator='/')
76+
a[:]=0
77+
78+
79+
# In[ ]:
80+
81+
6482
# make group for multidimensonal data
6583
dims_grp = root_grp.create_group('group_with_dims', overwrite=True)
6684

@@ -69,7 +87,7 @@
6987

7088

7189
# add 1D array
72-
a1 = dims_grp.create_dataset('var1D', shape=(20,), chunks=(4,), dtype='i4', overwrite=True)
90+
a1 = dims_grp.create_dataset('var1D', shape=(20,), chunks=(5,), dtype='i4', overwrite=True, compressor=None)
7391
data = np.arange(20)
7492
a1[:] = data
7593

@@ -78,23 +96,29 @@
7896

7997

8098
# add 2D array
81-
a2 = dims_grp.create_dataset('var2D', shape=(20,20), chunks=(4,4), dtype='i4', overwrite=True)
99+
a2 = dims_grp.create_dataset('var2D', shape=(20,20), chunks=(5,5), dtype='i4', overwrite=True, compressor=None)
82100
a2[:] = np.tile(data, (20,1))
83101

84102

85103
# In[ ]:
86104

87105

88106
# add 3D array
89-
a3 = dims_grp.create_dataset('var3D', shape=(20,20,20), chunks=(4,4,4), dtype='i4', overwrite=True)
107+
a3 = dims_grp.create_dataset('var3D', shape=(20,20,20), chunks=(5,5,5), dtype='i4', overwrite=True, compressor=None)
90108
a3[:] = np.tile(data, (20,20,1))
91109

92110

93111
# In[ ]:
94112

95113

96114
# add 4D array
97-
a4 = dims_grp.create_dataset('var4D', shape=(20,20,20,20), chunks=(4,4,4,4), dtype='i4', overwrite=True)
115+
a4 = dims_grp.create_dataset('var4D', shape=(20,20,20,20), chunks=(5,5,5,5), dtype='i4', overwrite=True, compressor=None)
98116
a4[:] = np.tile(data, (20,20,20,1))
99-
a[:]
117+
118+
119+
# In[ ]:
120+
121+
122+
## uncomment for zip store
123+
# store.close()
100124

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,16 @@
11
{
22
"chunks": [
3-
10,
4-
10
3+
5,
4+
2
55
],
6-
"compressor": {
7-
"blocksize": 0,
8-
"clevel": 5,
9-
"cname": "lz4",
10-
"id": "blosc",
11-
"shuffle": 1
12-
},
6+
"compressor": null,
137
"dtype": ">f8",
148
"fill_value": 0.0,
159
"filters": null,
1610
"order": "C",
1711
"shape": [
18-
10,
19-
10
12+
5,
13+
4
2014
],
2115
"zarr_format": 2
2216
}
Binary file not shown.
Binary file not shown.
Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,16 @@
11
{
22
"chunks": [
3-
5,
3+
2,
44
5
55
],
6-
"compressor": {
7-
"blocksize": 0,
8-
"clevel": 5,
9-
"cname": "lz4",
10-
"id": "blosc",
11-
"shuffle": 1
12-
},
6+
"compressor": null,
137
"dtype": ">f4",
148
"fill_value": 0.0,
159
"filters": null,
1610
"order": "C",
1711
"shape": [
18-
20,
19-
10
12+
4,
13+
5
2014
],
2115
"zarr_format": 2
2216
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)