Skip to content

Commit 5cc4025

Browse files
authored
Classifier (#1353)
* the best version so far * matche python mostly * before major change, kinda works * long one, all numpy stuff * classfier v1 * Seems to be working * stlye check * removed some not used stuff * add additional convert methods * some test fixes * rehaul * changes from code review comments
1 parent b5ef7fa commit 5cc4025

7 files changed

Lines changed: 307 additions & 3 deletions

File tree

cdm/core/src/main/java/ucar/nc2/constants/CDM.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public class CDM {
5858
public static final String RUNTIME_COORDINATE = "runtimeCoordinate";
5959
public static final String STANDARDIZE = "standardize";
6060
public static final String NORMALIZE = "normalize";
61+
public static final String CLASSIFY = "classify";
6162

6263
// Special attributes
6364

cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,16 @@ public enum Enhance {
125125
* If the enhanced data type is not {@code FLOAT} or {@code DOUBLE}, this has no effect.
126126
*/
127127
ApplyNormalizer,
128+
/**
129+
* Classify doubles or floats based on positive/negative into 1 or 0 {@code}
130+
* x<0 --> 0 x>0 --> 1
131+
*/
132+
ApplyClassifier,
128133
}
129134

130-
private static Set<Enhance> EnhanceAll =
131-
Collections.unmodifiableSet(EnumSet.of(Enhance.ConvertEnums, Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset,
132-
Enhance.ConvertMissing, Enhance.CoordSystems, Enhance.ApplyStandardizer, Enhance.ApplyNormalizer));
135+
private static Set<Enhance> EnhanceAll = Collections.unmodifiableSet(
136+
EnumSet.of(Enhance.ConvertEnums, Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset, Enhance.ConvertMissing,
137+
Enhance.CoordSystems, Enhance.ApplyStandardizer, Enhance.ApplyNormalizer, Enhance.ApplyClassifier));
133138
private static Set<Enhance> EnhanceNone = Collections.unmodifiableSet(EnumSet.noneOf(Enhance.class));
134139
private static Set<Enhance> defaultEnhanceMode = EnhanceAll;
135140

cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,9 @@ Array convert(Array data, Set<NetcdfDataset.Enhance> enhancements) {
289289
if (enhancements.contains(Enhance.ApplyNormalizer) && normalizer != null) {
290290
toApply.add(normalizer);
291291
}
292+
if (enhancements.contains(Enhance.ApplyClassifier) && classifier != null) {
293+
toApply.add(classifier);
294+
}
292295

293296
double[] dataArray = (double[]) data.get1DJavaArray(DataType.DOUBLE);
294297

@@ -865,6 +868,7 @@ public Array convert(Array in, boolean convertUnsigned, boolean applyScaleOffset
865868
private ScaleOffset scaleOffset;
866869
private Standardizer standardizer;
867870
private Normalizer normalizer;
871+
private Classifier classifier;
868872
private ConvertMissing convertMissing;
869873
private Set<Enhance> enhanceMode = EnumSet.noneOf(Enhance.class); // The set of enhancements that were made.
870874

@@ -939,6 +943,10 @@ private void createEnhancements() {
939943
if (normalizerAtt != null && this.enhanceMode.contains(Enhance.ApplyNormalizer) && dataType.isFloatingPoint()) {
940944
this.normalizer = Normalizer.createFromVariable(this);
941945
}
946+
Attribute classifierAtt = findAttribute(CDM.CLASSIFY);
947+
if (classifierAtt != null && this.enhanceMode.contains(Enhance.ApplyClassifier) && dataType.isNumeric()) {
948+
this.classifier = Classifier.createFromVariable(this);
949+
}
942950
}
943951

944952
public Builder<?> toBuilder() {
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package ucar.nc2.filter;
2+
3+
import java.io.IOException;
4+
import ucar.ma2.Array;
5+
import ucar.ma2.DataType;
6+
import ucar.ma2.IndexIterator;
7+
import ucar.nc2.dataset.VariableDS;
8+
9+
public class Classifier implements Enhancement {
10+
private Classifier classifier = null;
11+
private static Classifier emptyClassifier;
12+
private int classifiedVal;
13+
private int[] classifiedArray;
14+
15+
public static Classifier createFromVariable(VariableDS var) {
16+
try {
17+
Array arr = var.read();
18+
// DataType type = var.getDataType();
19+
return emptyClassifier();
20+
} catch (IOException e) {
21+
return emptyClassifier();
22+
}
23+
}
24+
25+
public static Classifier emptyClassifier() {
26+
emptyClassifier = new Classifier();
27+
return emptyClassifier;
28+
}
29+
30+
/** Enough of a constructor */
31+
public Classifier() {}
32+
33+
/** Classify double array */
34+
public int[] classifyDoubleArray(Array arr) {
35+
int[] classifiedArray = new int[(int) arr.getSize()];
36+
int i = 0;
37+
IndexIterator iterArr = arr.getIndexIterator();
38+
while (iterArr.hasNext()) {
39+
Number value = (Number) iterArr.getObjectNext();
40+
if (!Double.isNaN(value.doubleValue())) {
41+
42+
classifiedArray[i] = classifyArray(value.doubleValue());
43+
}
44+
i++;
45+
}
46+
return classifiedArray;
47+
}
48+
49+
50+
51+
/** for a single double */
52+
public int classifyArray(double val) {
53+
if (val >= 0) {
54+
classifiedVal = 1;
55+
} else {
56+
classifiedVal = 0;
57+
}
58+
59+
return classifiedVal;
60+
}
61+
62+
@Override
63+
public double convert(double val) {
64+
return emptyClassifier.classifyArray(val);
65+
}
66+
67+
68+
}
69+
70+
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
~ Copyright (c) 1998-2023 University Corporation for Atmospheric Research/Unidata
4+
~ See LICENSE for license information.
5+
-->
6+
7+
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2" enhance="all">
8+
9+
<variable name="doublePositives" shape="5" type="double">
10+
<attribute name="classify"/>
11+
<values>1.0 2.0 3.0 4.0 5.0</values>
12+
</variable>
13+
14+
<variable name="doubleNegatives" shape="5" type="double">
15+
<attribute name="classify"/>
16+
<values>-1.0 -2.0 -3.0 -4.0 -5.0</values>
17+
</variable>
18+
19+
<variable name="doubleMix" shape="5" type="double">
20+
<attribute name="classify"/>
21+
<values>1.0 -2.0 0.0 4.0 -5.0</values>
22+
</variable>
23+
24+
<variable name="floatPositives" shape="5" type="float">
25+
<attribute name="classify"/>
26+
<values>1.0 2.0 3.0 4.0 5.0</values>
27+
</variable>
28+
29+
<variable name="floatNegatives" shape="5" type="float">
30+
<attribute name="classify"/>
31+
<values>-1.0 -2.0 -3.0 -4.0 -5.0</values>
32+
</variable>
33+
<variable name="floatMix" shape="5" type="float">
34+
<attribute name="classify"/>
35+
<values>1.0 -2.0 0.0 4.0 -5.0</values>
36+
</variable>
37+
38+
<variable name="intPositives" shape="5" type="int">
39+
<attribute name="classify"/>
40+
<values>1 2 3 4 5</values>
41+
</variable>
42+
43+
<variable name="intNegatives" shape="5" type="int">
44+
<attribute name="classify"/>
45+
<values>-1.0 -2.0 -3.0 -4.0 -5.0</values>
46+
</variable>
47+
<variable name="intMix" shape="5" type="int">
48+
<attribute name="classify"/>
49+
<values>1.0 -2.0 0.0 4.0 -5.0</values>
50+
</variable>
51+
52+
53+
54+
</netcdf>
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package ucar.nc2.filter;
2+
3+
import static org.junit.Assert.*;
4+
import org.junit.Test;
5+
import ucar.ma2.Array;
6+
7+
8+
public class TestClassifier {
9+
10+
11+
12+
/** test doubles */
13+
@Test
14+
public void testClassifyDoubleArray_AllPositive() {
15+
Classifier classifier = new Classifier();
16+
double[] input = {1.1, 2.2, 3.3};
17+
int[] expected = {1, 1, 1};
18+
Array DATA = Array.makeFromJavaArray(input);
19+
assertArrayEquals(expected, classifier.classifyDoubleArray(DATA));
20+
}
21+
22+
@Test
23+
public void testClassifyDoubleArray_AllNegative() {
24+
Classifier classifier = new Classifier();
25+
double[] input = {-1.1, -2.2, -3.3};
26+
int[] expected = {0, 0, 0};
27+
Array DATA = Array.makeFromJavaArray(input);
28+
assertArrayEquals(expected, classifier.classifyDoubleArray(DATA));
29+
}
30+
31+
@Test
32+
public void testClassifyDoubleArray_Mixed() {
33+
Classifier classifier = new Classifier();
34+
double[] input = {-1.1, 2.2, -3.3, 4.4};
35+
int[] expected = {0, 1, 0, 1};
36+
Array DATA = Array.makeFromJavaArray(input);
37+
assertArrayEquals(expected, classifier.classifyDoubleArray(DATA));
38+
}
39+
40+
@Test
41+
public void testClassifyDoubleArray_WithZero() {
42+
Classifier classifier = new Classifier();
43+
double[] input = {0.0, -1.1, 1.1, 0.0, 0.0, 0.0};
44+
int[] expected = {1, 0, 1, 1, 1, 1};
45+
Array DATA = Array.makeFromJavaArray(input);
46+
assertArrayEquals(expected, classifier.classifyDoubleArray(DATA));
47+
}
48+
49+
50+
51+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package ucar.nc2.ncml;
2+
3+
import static com.google.common.truth.Truth.assertThat;
4+
import static ucar.ma2.MAMath.nearlyEquals;
5+
6+
import java.io.IOException;
7+
import org.junit.Test;
8+
import ucar.ma2.Array;
9+
import ucar.ma2.DataType;
10+
import ucar.nc2.NetcdfFile;
11+
import ucar.nc2.Variable;
12+
import ucar.nc2.dataset.NetcdfDatasets;
13+
import ucar.unidata.util.test.TestDir;
14+
15+
public class TestEnhanceClassifier {
16+
17+
private static String dataDir = TestDir.cdmLocalTestDataDir + "ncml/enhance/";
18+
19+
public static final int[] all_ones = {1, 1, 1, 1, 1};
20+
public static final Array DATA_all_ones = Array.makeFromJavaArray(all_ones);
21+
public static final int[] all_zeroes = {0, 0, 0, 0, 0};
22+
public static final Array DATA_all_zeroes = Array.makeFromJavaArray(all_zeroes);
23+
public static final int[] mixNumbers = {1, 0, 1, 1, 0};
24+
public static final Array DATA_mixNumbers = Array.makeFromJavaArray(mixNumbers);
25+
26+
27+
/** test on doubles, all positives, all negatives and a mixed array */
28+
@Test
29+
public void testEnhanceClassifier_doubles() throws IOException {
30+
try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testClassifier.ncml", true, null)) {
31+
Variable doublePositives = ncfile.findVariable("doublePositives");
32+
assertThat((Object) doublePositives).isNotNull();
33+
assertThat(doublePositives.getDataType()).isEqualTo(DataType.DOUBLE);
34+
assertThat(doublePositives.attributes().hasAttribute("classify")).isTrue();
35+
Array dataDoubles = doublePositives.read();
36+
assertThat(nearlyEquals(dataDoubles, DATA_all_ones)).isTrue();
37+
38+
Variable doubleNegatives = ncfile.findVariable("doubleNegatives");
39+
assertThat((Object) doubleNegatives).isNotNull();
40+
assertThat(doubleNegatives.getDataType()).isEqualTo(DataType.DOUBLE);
41+
assertThat(doubleNegatives.attributes().hasAttribute("classify")).isTrue();
42+
Array datadoubleNegatives = doubleNegatives.read();
43+
assertThat(nearlyEquals(datadoubleNegatives, DATA_all_zeroes)).isTrue();
44+
45+
Variable doubleMix = ncfile.findVariable("doubleMix");
46+
assertThat((Object) doubleMix).isNotNull();
47+
assertThat(doubleMix.getDataType()).isEqualTo(DataType.DOUBLE);
48+
assertThat(doubleMix.attributes().hasAttribute("classify")).isTrue();
49+
Array datadoubleMix = doubleMix.read();
50+
assertThat(nearlyEquals(datadoubleMix, DATA_mixNumbers)).isTrue();
51+
52+
}
53+
54+
55+
}
56+
57+
/** test on floats, all positives, all negatives and a mixed array */
58+
@Test
59+
public void testEnhanceClassifier_floats() throws IOException {
60+
try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testClassifier.ncml", true, null)) {
61+
62+
Variable floatPositives = ncfile.findVariable("floatPositives");
63+
assertThat((Object) floatPositives).isNotNull();
64+
assertThat(floatPositives.getDataType()).isEqualTo(DataType.FLOAT);
65+
assertThat(floatPositives.attributes().hasAttribute("classify")).isTrue();
66+
Array datafloats = floatPositives.read();
67+
assertThat(nearlyEquals(datafloats, DATA_all_ones)).isTrue();
68+
69+
Variable floatNegatives = ncfile.findVariable("floatNegatives");
70+
assertThat((Object) floatNegatives).isNotNull();
71+
assertThat(floatNegatives.getDataType()).isEqualTo(DataType.FLOAT);
72+
assertThat(floatNegatives.attributes().hasAttribute("classify")).isTrue();
73+
Array datafloatNegatives = floatNegatives.read();
74+
assertThat(nearlyEquals(datafloatNegatives, DATA_all_zeroes)).isTrue();
75+
76+
Variable floatMix = ncfile.findVariable("floatMix");
77+
assertThat((Object) floatMix).isNotNull();
78+
assertThat(floatMix.getDataType()).isEqualTo(DataType.FLOAT);
79+
assertThat(floatMix.attributes().hasAttribute("classify")).isTrue();
80+
Array datafloatsMix = floatMix.read();
81+
assertThat(nearlyEquals(datafloatsMix, DATA_mixNumbers)).isTrue();
82+
83+
}
84+
85+
}
86+
87+
/** enhance is not applied to Integers, so we expect the same values after application */
88+
@Test
89+
public void testEnhanceClassifier_integers() throws IOException {
90+
91+
try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testClassifier.ncml", true, null)) {
92+
Variable IntegerPositives = ncfile.findVariable("intPositives");
93+
assertThat((Object) IntegerPositives).isNotNull();
94+
assertThat(IntegerPositives.getDataType()).isEqualTo(DataType.INT);
95+
assertThat(IntegerPositives.attributes().hasAttribute("classify")).isTrue();
96+
Array dataIntegers = IntegerPositives.read();
97+
assertThat(nearlyEquals(dataIntegers, DATA_all_ones)).isTrue();
98+
99+
Variable intNegatives = ncfile.findVariable("intNegatives");
100+
assertThat((Object) intNegatives).isNotNull();
101+
assertThat(intNegatives.getDataType()).isEqualTo(DataType.INT);
102+
assertThat(intNegatives.attributes().hasAttribute("classify")).isTrue();
103+
Array dataintNegatives = intNegatives.read();
104+
assertThat(nearlyEquals(dataintNegatives, DATA_all_zeroes)).isTrue();
105+
106+
Variable intMix = ncfile.findVariable("intMix");
107+
assertThat((Object) intMix).isNotNull();
108+
assertThat(intMix.getDataType()).isEqualTo(DataType.INT);
109+
assertThat(intMix.attributes().hasAttribute("classify")).isTrue();
110+
Array dataintMix = intMix.read();
111+
assertThat(nearlyEquals(dataintMix, DATA_mixNumbers)).isTrue();
112+
}
113+
114+
}
115+
}

0 commit comments

Comments
 (0)