Skip to content

Commit 12a018c

Browse files
committed
Use SMSD path fingerprints in ReactionFingerprinter and ReactionContainer
Replace CDK CircularFingerprinter/Fingerprinter with SMSD SearchEngine.pathFingerprint in ReactionFingerprinter and ReactionContainer for lighter CDK dependency. GameTheoryEngine and StandardizeReaction retain CDK ECFP4 where threshold-sensitive similarity and deterministic cache keys are required. All 163 tests pass.
1 parent 54e5823 commit 12a018c

3 files changed

Lines changed: 31 additions & 19 deletions

File tree

pom.xml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,6 @@
151151
<artifactId>cdk-isomorphism</artifactId>
152152
<version>${cdk.version}</version>
153153
</dependency>
154-
<dependency>
155-
<groupId>org.openscience.cdk</groupId>
156-
<artifactId>cdk-fingerprint</artifactId>
157-
<version>${cdk.version}</version>
158-
</dependency>
159154
<dependency>
160155
<groupId>org.openscience.cdk</groupId>
161156
<artifactId>cdk-atomtype</artifactId>
@@ -186,6 +181,11 @@
186181
<artifactId>cdk-inchi</artifactId>
187182
<version>${cdk.version}</version>
188183
</dependency>
184+
<dependency>
185+
<groupId>org.openscience.cdk</groupId>
186+
<artifactId>cdk-fingerprint</artifactId>
187+
<version>${cdk.version}</version>
188+
</dependency>
189189

190190
<dependency>
191191
<groupId>com.bioinceptionlabs</groupId>

src/main/java/com/bioinceptionlabs/reactionblast/fingerprints/ReactionFingerprinter.java

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import java.util.BitSet;
2323
import org.openscience.cdk.Reaction;
2424
import org.openscience.cdk.exception.CDKException;
25-
import org.openscience.cdk.fingerprint.CircularFingerprinter;
25+
import com.bioinception.smsd.core.SearchEngine;
2626
import org.openscience.cdk.interfaces.IAtomContainer;
2727
import org.openscience.cdk.interfaces.IAtomContainerSet;
2828
import org.openscience.cdk.interfaces.IReaction;
@@ -34,7 +34,6 @@
3434
import static java.lang.String.valueOf;
3535
import static java.lang.System.currentTimeMillis;
3636
import static java.util.logging.Level.SEVERE;
37-
import static org.openscience.cdk.fingerprint.CircularFingerprinter.CLASS_ECFP4;
3837
import static org.openscience.cdk.geometry.GeometryUtil.has2DCoordinates;
3938
import static org.openscience.cdk.graph.ConnectivityChecker.isConnected;
4039
import static org.openscience.cdk.tools.LoggingToolFactory.createLoggingTool;
@@ -212,6 +211,9 @@ public static interface IFingerprintGenerator {
212211
*/
213212
public static class FingerprintGenerator implements IFingerprintGenerator {
214213

214+
private static final int FP_PATH_LENGTH = 7;
215+
private static final int FP_SIZE = 1024;
216+
215217
private final static ILoggingTool LOGGER
216218
= createLoggingTool(FingerprintGenerator.class);
217219

@@ -221,20 +223,13 @@ public static class FingerprintGenerator implements IFingerprintGenerator {
221223
* @return
222224
*/
223225
public static int getFingerprinterSize() {
224-
return new CircularFingerprinter(CLASS_ECFP4).getSize();
226+
return FP_SIZE;
225227
}
226228

227-
//define the FINGERPRINT_SIZE of the fingerprint
228-
//NOTE: this should be a multiple of 64 and preferably not 1024 or 2048
229-
//as for these values we often get the random numbers for one-atom or
230-
//two-atom paths the same!
231-
final CircularFingerprinter fingerprinter;
232-
233229
/**
234230
*
235231
*/
236232
public FingerprintGenerator() {
237-
fingerprinter = new CircularFingerprinter(CLASS_ECFP4);
238233
}
239234

240235
/**
@@ -255,11 +250,28 @@ public BitSet getFingerprint(IAtomContainer mol) throws CDKException {
255250
LOGGER.debug("Disconnected components needs to be layout separately");
256251
}
257252
}
258-
return fingerprinter.getBitFingerprint(mol).asBitSet();
253+
long[] fp = SearchEngine.pathFingerprint(mol, FP_PATH_LENGTH, FP_SIZE);
254+
return longArrayToBitSet(fp);
259255
}
260256

261257
}
262258

259+
/**
260+
* Convert SMSD long[] fingerprint to BitSet
261+
*/
262+
public static BitSet longArrayToBitSet(long[] fp) {
263+
BitSet bs = new BitSet(fp.length * 64);
264+
for (int i = 0; i < fp.length; i++) {
265+
long word = fp[i];
266+
for (int bit = 0; bit < 64; bit++) {
267+
if ((word & (1L << bit)) != 0) {
268+
bs.set(i * 64 + bit);
269+
}
270+
}
271+
}
272+
return bs;
273+
}
274+
263275

264276

265277
/**

src/main/java/com/bioinceptionlabs/reactionblast/mapping/ReactionContainer.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
import java.util.Objects;
2323
import java.util.Set;
2424
import java.util.TreeMap;
25+
import com.bioinception.smsd.core.SearchEngine;
2526
import org.openscience.cdk.exception.CDKException;
26-
import org.openscience.cdk.fingerprint.Fingerprinter;
2727
import org.openscience.cdk.interfaces.IAtom;
2828
import org.openscience.cdk.interfaces.IAtomContainer;
2929
import org.openscience.cdk.interfaces.IReaction;
@@ -1144,8 +1144,8 @@ private IAtomContainer setProperty(IAtomContainer molecule) throws Exception {
11441144
Single Atom fingerprints
11451145
*/
11461146
if (fingerprint_Present_Mol.isEmpty()) {
1147-
Fingerprinter fingerprinter = new Fingerprinter();
1148-
fingerprint_Present_Mol = fingerprinter.getBitFingerprint(molecule).asBitSet();
1147+
long[] fp = SearchEngine.pathFingerprint(molecule, 7, 1024);
1148+
fingerprint_Present_Mol = com.bioinceptionlabs.reactionblast.fingerprints.ReactionFingerprinter.longArrayToBitSet(fp);
11491149
}
11501150
//Loop for Unique Mol ID Creation
11511151
if (!fingerprint_Present_Mol.isEmpty()) {

0 commit comments

Comments
 (0)