Skip to content

Commit 5a7f22f

Browse files
committed
Upgrade SMSD to 6.3.0, replace CDK fingerprints with SMSD, update README
- Bump SMSD 5.10.0 to 6.3.0, remove cdk-fingerprint dependency - GameTheoryEngine: use SMSD.circularFingerprintFCFP with IdentityHashMap cache - StandardizeReaction: use SMSD.circularFingerprintECFP + fingerprintTanimoto - Add atom-balance guard to prevent filtering genuine reactants - README: add RDT logo, update to v3.5.0, SMSD 6.3.0, 163 tests - All 163 tests pass, no CDK fingerprint imports remain
1 parent 312fa58 commit 5a7f22f

4 files changed

Lines changed: 56 additions & 33 deletions

File tree

README.md

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
![RDT Logo](images/rdt-logo.svg)
2+
13
Introduction
24
============
35

4-
`Reaction Decoder Tool (RDT) v3.3.0`
6+
`Reaction Decoder Tool (RDT) v3.5.0`
57
--------------------------------------
68

79
**Toolkit-agnostic reaction mapping engine** with CDK adapter. Deterministic, no training data required.
@@ -109,7 +111,7 @@ The package namespace has changed from `uk.ac.ebi` to `com.bioinceptionlabs` in
109111
<!-- Old (v2.x) -->
110112
<groupId>uk.ac.ebi.rdt</groupId>
111113

112-
<!-- New (v3.3.0+) -->
114+
<!-- New (v3.5.0+) -->
113115
<groupId>com.bioinceptionlabs</groupId>
114116
```
115117

@@ -147,10 +149,10 @@ Performance
147149
|--------|-------|
148150
| Mapping speed | 3.4 reactions/sec (USPTO 50K) |
149151
| RXN coverage | 598/599 (99.8%) |
150-
| Test suite | 156 tests, 100% pass |
152+
| Test suite | 163 tests, 100% pass |
151153
| Test time | ~120s (4x faster than v2.x) |
152154
| Codebase | 68 files (reduced from 345) |
153-
| Dependencies | SMSD 5.2.1, CDK 2.12 |
155+
| Dependencies | SMSD 6.3.0, CDK 2.12 (lightweight) |
154156
| Deterministic | Yes (no ML training needed) |
155157

156158
How to Cite RDT?
@@ -183,7 +185,7 @@ Sub-commands
183185
`AAM using SMILES`
184186

185187
```
186-
java -jar rdt-3.3.0-jar-with-dependencies.jar -Q SMI -q "CC(O)CC(=O)OC(C)CC(O)=O.O[H]>>[H]OC(=O)CC(C)O.CC(O)CC(O)=O" -g -c -j AAM -f TEXT
188+
java -jar rdt-3.5.0-jar-with-dependencies.jar -Q SMI -q "CC(O)CC(=O)OC(C)CC(O)=O.O[H]>>[H]OC(=O)CC(C)O.CC(O)CC(O)=O" -g -c -j AAM -f TEXT
187189
```
188190

189191
`Perform AAM` for Transporters
@@ -192,14 +194,14 @@ Sub-commands
192194
`AAM using SMILES` (accept mapping with no bond changes -b)
193195

194196
```
195-
java -jar rdt-3.3.0-jar-with-dependencies.jar -Q SMI -q "O=C(O)C(N)CC(=O)N.O=C(O)C(N)CS>>C(N)(CC(=O)N)C(=O)O.O=C(O)C(N)CS" -b -g -c -j AAM -f TEXT
197+
java -jar rdt-3.5.0-jar-with-dependencies.jar -Q SMI -q "O=C(O)C(N)CC(=O)N.O=C(O)C(N)CS>>C(N)(CC(=O)N)C(=O)O.O=C(O)C(N)CS" -b -g -c -j AAM -f TEXT
196198
```
197199

198200
`Annotate Reaction using SMILES`
199201
---------------------------------
200202

201203
```
202-
java -jar rdt-3.3.0-jar-with-dependencies.jar -Q SMI -q "CC(O)CC(=O)OC(C)CC(O)=O.O[H]>>[H]OC(=O)CC(C)O.CC(O)CC(O)=O" -g -c -j ANNOTATE -f XML
204+
java -jar rdt-3.5.0-jar-with-dependencies.jar -Q SMI -q "CC(O)CC(=O)OC(C)CC(O)=O.O[H]>>[H]OC(=O)CC(C)O.CC(O)CC(O)=O" -g -c -j ANNOTATE -f XML
203205
```
204206

205207

@@ -209,12 +211,12 @@ Sub-commands
209211
`Compare Reactions using SMILES with precomputed AAM mappings`
210212

211213
```
212-
java -jar rdt-3.3.0-jar-with-dependencies.jar -Q RXN -q example/ReactionDecoder_mapped.rxn -T RXN -t example/ReactionDecoder_mapped.rxn -j COMPARE -f BOTH -u
214+
java -jar rdt-3.5.0-jar-with-dependencies.jar -Q RXN -q example/ReactionDecoder_mapped.rxn -T RXN -t example/ReactionDecoder_mapped.rxn -j COMPARE -f BOTH -u
213215
```
214216

215217

216218
`Compare Reactions using RXN files`
217219

218220
```
219-
java -jar rdt-3.3.0-jar-with-dependencies.jar -Q RXN -q example/ReactionDecoder_mapped.rxn -T RXN -t example/ReactionDecoder_mapped.rxn -j COMPARE -f BOTH
221+
java -jar rdt-3.5.0-jar-with-dependencies.jar -Q RXN -q example/ReactionDecoder_mapped.rxn -T RXN -t example/ReactionDecoder_mapped.rxn -j COMPARE -f BOTH
220222
```

pom.xml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,16 +181,11 @@
181181
<artifactId>cdk-inchi</artifactId>
182182
<version>${cdk.version}</version>
183183
</dependency>
184-
<dependency>
185-
<groupId>org.openscience.cdk</groupId>
186-
<artifactId>cdk-fingerprint</artifactId>
187-
<version>${cdk.version}</version>
188-
</dependency>
189184

190185
<dependency>
191186
<groupId>com.bioinceptionlabs</groupId>
192187
<artifactId>smsd</artifactId>
193-
<version>5.8.1</version>
188+
<version>6.3.0</version>
194189
</dependency>
195190

196191
<!-- https://mvnrepository.com/artifact/commons-cli/commons-cli -->

src/main/java/com/bioinceptionlabs/reactionblast/mapping/algorithm/GameTheoryEngine.java

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@
4747

4848
import org.openscience.cdk.PseudoAtom;
4949
import org.openscience.cdk.exception.CDKException;
50-
import org.openscience.cdk.fingerprint.CircularFingerprinter;
51-
import org.openscience.cdk.fingerprint.IBitFingerprint;
50+
import com.bioinception.smsd.core.SMSD;
5251
import org.openscience.cdk.graph.CycleFinder;
5352
import org.openscience.cdk.graph.Cycles;
5453
import org.openscience.cdk.interfaces.IAtom;
@@ -138,6 +137,9 @@ public abstract class GameTheoryEngine extends Debugger implements IGameTheory,
138137
= createLoggingTool(GameTheoryEngine.class);
139138
private static final long serialVersionUID = 1698688633678282L;
140139

140+
private final transient java.util.IdentityHashMap<IAtomContainer, int[]> circularFPCache
141+
= new java.util.IdentityHashMap<>();
142+
141143
// ---- BaseGameTheory methods inlined into outer class ----
142144

143145
protected static boolean isPseudoAtoms(IAtomContainer atomContainer) {
@@ -500,10 +502,19 @@ String generateUniqueKey(
500502
}
501503

502504
private int[] getCircularFP(IAtomContainer mol) throws CDKException {
503-
CircularFingerprinter circularFingerprinter = new CircularFingerprinter(6, 1024);
504-
circularFingerprinter.setPerceiveStereo(true);
505-
IBitFingerprint bitFingerprint = circularFingerprinter.getBitFingerprint(mol);
506-
return bitFingerprint.getSetbits();
505+
int[] cached = circularFPCache.get(mol);
506+
if (cached != null) {
507+
return cached;
508+
}
509+
long[] fp = SMSD.circularFingerprintFCFP(mol, 1, 1024);
510+
BitSet bs = SMSD.toBitSet(fp);
511+
int[] bits = new int[bs.cardinality()];
512+
int idx = 0;
513+
for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
514+
bits[idx++] = i;
515+
}
516+
circularFPCache.put(mol, bits);
517+
return bits;
507518
}
508519

509520
MCSSolution copyOldSolutionToNew(int queryPosition, int targetPosition,

src/main/java/com/bioinceptionlabs/reactionblast/tools/StandardizeReaction.java

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,11 @@
2828
import java.util.Set;
2929
import java.util.logging.Level;
3030

31-
import org.openscience.cdk.fingerprint.CircularFingerprinter;
31+
import com.bioinception.smsd.core.SMSD;
3232
import org.openscience.cdk.interfaces.IAtom;
3333
import org.openscience.cdk.interfaces.IAtomContainer;
3434
import org.openscience.cdk.interfaces.IAtomContainerSet;
3535
import org.openscience.cdk.interfaces.IReaction;
36-
import org.openscience.cdk.similarity.Tanimoto;
3736
import org.openscience.cdk.tools.ILoggingTool;
3837
import static org.openscience.cdk.tools.LoggingToolFactory.createLoggingTool;
3938
import com.bioinceptionlabs.reactionblast.mapping.ReactionContainer.CDKReactionBuilder;
@@ -172,14 +171,13 @@ public IReaction filterReagents(IReaction reaction) {
172171
}
173172

174173
try {
175-
CircularFingerprinter fp = new CircularFingerprinter();
176174
IAtomContainerSet products = reaction.getProducts();
177175

178-
// Pre-compute product fingerprints
179-
List<org.openscience.cdk.fingerprint.IBitFingerprint> productFPs = new ArrayList<>();
176+
// Pre-compute product fingerprints using SMSD ECFP4 (radius=2)
177+
List<long[]> productFPs = new ArrayList<>();
180178
for (IAtomContainer prod : products.atomContainers()) {
181179
try {
182-
productFPs.add(fp.getBitFingerprint(prod));
180+
productFPs.add(SMSD.circularFingerprintECFP(prod, 2, 1024));
183181
} catch (Exception e) {
184182
productFPs.add(null);
185183
}
@@ -200,10 +198,14 @@ public IReaction filterReagents(IReaction reaction) {
200198
boolean isReagent = false;
201199
String reason = "";
202200

201+
// Check if this reactant is needed for atom balance
202+
boolean neededForBalance = isNeededForBalance(
203+
reactant, reaction.getReactants(), productAtomCounts);
204+
203205
try {
204206
// Check 1: Known solvent/reagent by canonical SMILES
205207
String canSmiles = smiGen.create(reactant);
206-
if (KNOWN_REAGENT_SMILES.contains(canSmiles)) {
208+
if (!neededForBalance && KNOWN_REAGENT_SMILES.contains(canSmiles)) {
207209
isReagent = true;
208210
reason = "known reagent/solvent: " + canSmiles;
209211
}
@@ -220,14 +222,13 @@ public IReaction filterReagents(IReaction reaction) {
220222
}
221223

222224
// Check 3: Tanimoto fingerprint similarity
223-
if (!isReagent) {
224-
org.openscience.cdk.fingerprint.IBitFingerprint reactantFP =
225-
fp.getBitFingerprint(reactant);
225+
if (!isReagent && !neededForBalance) {
226+
long[] reactantFP = SMSD.circularFingerprintECFP(reactant, 2, 1024);
226227

227228
double maxSim = 0.0;
228-
for (org.openscience.cdk.fingerprint.IBitFingerprint prodFP : productFPs) {
229+
for (long[] prodFP : productFPs) {
229230
if (prodFP != null) {
230-
double sim = Tanimoto.calculate(reactantFP, prodFP);
231+
double sim = SMSD.fingerprintTanimoto(reactantFP, prodFP);
231232
maxSim = Math.max(maxSim, sim);
232233
}
233234
}
@@ -310,6 +311,20 @@ public IReaction filterReagents(IReaction reaction) {
310311
}
311312
}
312313

314+
private boolean isNeededForBalance(IAtomContainer candidate,
315+
IAtomContainerSet allReactants, Map<String, Integer> productAtomCounts) {
316+
Map<String, Integer> remaining = new LinkedHashMap<>(countAtoms(allReactants));
317+
for (IAtom atom : candidate.atoms()) {
318+
remaining.merge(atom.getSymbol(), -1, Integer::sum);
319+
}
320+
for (Map.Entry<String, Integer> entry : productAtomCounts.entrySet()) {
321+
if (remaining.getOrDefault(entry.getKey(), 0) < entry.getValue()) {
322+
return true;
323+
}
324+
}
325+
return false;
326+
}
327+
313328
private Map<String, Integer> countAtoms(IAtomContainerSet molSet) {
314329
Map<String, Integer> counts = new LinkedHashMap<>();
315330
for (IAtomContainer mol : molSet.atomContainers()) {

0 commit comments

Comments
 (0)