Skip to content

Commit 5aa0079

Browse files
committed
Add mapping diagnostics benchmark
1 parent c2108c1 commit 5aa0079

7 files changed

Lines changed: 546 additions & 13 deletions

File tree

src/main/java/com/bioinceptionlabs/reactionblast/mapping/CallableAtomMappingTool.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ private void generateAtomAtomMapping(
108108
return;
109109
}
110110

111+
MappingDiagnostics.resetReaction(standardizedReaction.getID());
112+
111113
if (isIdentityReaction(standardizedReaction)) {
112114
try {
113115
Reactor minResult = new MappingThread(

src/main/java/com/bioinceptionlabs/reactionblast/mapping/GraphMatcher.java

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ private static void harmonizeForSmsd(IAtomContainer container) {
205205
public static Collection<MCSSolution> matcher(Holder mh) throws Exception {
206206
ExecutorService executor = null;
207207
Collection<MCSSolution> mcsSolutions = new ArrayList<>();
208+
long matcherStart = currentTimeMillis();
209+
String reactionId = mh.getReactionID();
210+
String algorithmName = mh.getTheory() == null ? "UNKNOWN" : mh.getTheory().name();
208211

209212
LOGGER.debug("Matcher Class for " + mh.getTheory());
210213
Set<Combination> jobReplicatorList = new TreeSet<>();
@@ -350,6 +353,7 @@ public static Collection<MCSSolution> matcher(Holder mh) throws Exception {
350353
executor = Executors.newFixedThreadPool(threadsAvailable);
351354
CompletionService<MCSSolution> callablesQueue = new ExecutorCompletionService<>(executor);
352355

356+
List<PairJob> jobsToRun = new ArrayList<>();
353357
List<MCSThread> listOfJobs = new ArrayList<>();
354358
Map<Combination, PairJob> pairJobsByRepresentative = new HashMap<>();
355359

@@ -416,7 +420,30 @@ public static Collection<MCSSolution> matcher(Holder mh) throws Exception {
416420
continue;
417421
}
418422

419-
// Clone molecules for thread safety — CDK IAtomContainer is mutable and not thread-safe
423+
jobsToRun.add(pairJob);
424+
}
425+
426+
if (skippedIdentity + skippedRatio + skippedTanimoto > 0) {
427+
LOGGER.debug("Pre-filter: skipped " + skippedIdentity + " identity, "
428+
+ skippedRatio + " ratio, " + skippedTanimoto + " tanimoto pairs");
429+
}
430+
431+
int invocationIndex = MappingDiagnostics.recordMatcherInvocation(
432+
reactionId,
433+
algorithmName,
434+
jobReplicatorList.size(),
435+
pairJobs.size(),
436+
skippedIdentity,
437+
skippedRatio,
438+
skippedTanimoto,
439+
jobsToRun.size());
440+
441+
for (PairJob pairJob : jobsToRun) {
442+
Combination representative = pairJob.representative;
443+
int substrateIndex = representative.getRowIndex();
444+
int productIndex = representative.getColIndex();
445+
IAtomContainer educt = reactionStructureInformation.getEduct(substrateIndex);
446+
IAtomContainer product = reactionStructureInformation.getProduct(productIndex);
420447
IAtomContainer eductClone;
421448
IAtomContainer productClone;
422449
try {
@@ -427,18 +454,14 @@ public static Collection<MCSSolution> matcher(Holder mh) throws Exception {
427454
productClone = product;
428455
}
429456
MCSThread mcsThread = new MCSThread(mh.getTheory(),
430-
substrateIndex, productIndex, eductClone, productClone);
457+
substrateIndex, productIndex, eductClone, productClone,
458+
reactionId, algorithmName, invocationIndex);
431459
mcsThread.setHasPerfectRings(pairJob.hasPerfectRings);
432460
mcsThread.setEductRingCount(pairJob.numberOfCyclesEduct);
433461
mcsThread.setProductRingCount(pairJob.numberOfCyclesProduct);
434462
listOfJobs.add(mcsThread);
435463
}
436464

437-
if (skippedIdentity + skippedRatio + skippedTanimoto > 0) {
438-
LOGGER.debug("Pre-filter: skipped " + skippedIdentity + " identity, "
439-
+ skippedRatio + " ratio, " + skippedTanimoto + " tanimoto pairs");
440-
}
441-
442465
if (listOfJobs.size() > LARGE_JOB_THRESHOLD) {
443466
LOGGER.warn("Large job: " + listOfJobs.size() + " MCS pairs to compute");
444467
}
@@ -469,6 +492,7 @@ public static Collection<MCSSolution> matcher(Holder mh) throws Exception {
469492
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
470493

471494
LOGGER.debug("==Gathering MCS solution from the Thread==");
495+
long replayedMappings = 0;
472496
threadedUniqueMCSSolutions.stream().filter((mcs) -> !(mcs == null)).forEach((MCSSolution mcs) -> {
473497
Combination representative = new Combination(
474498
mcs.getQueryPosition(),
@@ -490,6 +514,13 @@ public static Collection<MCSSolution> matcher(Holder mh) throws Exception {
490514
mcsSolutions.add(replicatedMCS);
491515
}
492516
});
517+
replayedMappings = mcsSolutions.size();
518+
MappingDiagnostics.recordMatcherCompletion(
519+
reactionId,
520+
algorithmName,
521+
invocationIndex,
522+
replayedMappings,
523+
currentTimeMillis() - matcherStart);
493524
jobReplicatorList.clear();
494525

495526
} catch (Exception ex) {
@@ -989,6 +1020,9 @@ public static class MCSThread implements Callable<MCSSolution> {
9891020
*
9901021
*/
9911022
protected final IMappingAlgorithm theory;
1023+
private final String reactionId;
1024+
private final String algorithmName;
1025+
private final int invocationIndex;
9921026

9931027
/**
9941028
*
@@ -1012,13 +1046,17 @@ public static class MCSThread implements Callable<MCSSolution> {
10121046
* @throws org.openscience.cdk.exception.CDKException
10131047
*/
10141048
MCSThread(IMappingAlgorithm theory, int queryPosition, int targetPosition,
1015-
IAtomContainer educt, IAtomContainer product)
1049+
IAtomContainer educt, IAtomContainer product,
1050+
String reactionId, String algorithmName, int invocationIndex)
10161051
throws CloneNotSupportedException, CDKException {
10171052
this.compound1 = getNewContainerWithIDs(educt);
10181053
this.compound2 = getNewContainerWithIDs(product);
10191054
this.queryPosition = queryPosition;
10201055
this.targetPosition = targetPosition;
10211056
this.theory = theory;
1057+
this.reactionId = reactionId;
1058+
this.algorithmName = algorithmName;
1059+
this.invocationIndex = invocationIndex;
10221060
this.numberOfCyclesEduct = 0;
10231061
this.numberOfCyclesProduct = 0;
10241062
}
@@ -1069,6 +1107,7 @@ public MCSSolution call() throws Exception {
10691107
am = AtomBondMatcher.atomMatcher(true, isHasPerfectRings());
10701108
bm = AtomBondMatcher.bondMatcher(false, isHasPerfectRings());
10711109

1110+
MappingDiagnostics.recordSubstructureSearch(reactionId, algorithmName, invocationIndex);
10721111
substructure = MAPPING_ENGINE.findSubstructure(ac1, ac2, am, bm, true,
10731112
SINGLE_SUBGRAPH_MATCH, SUBGRAPH_TIMEOUT_MS);
10741113

@@ -1077,13 +1116,15 @@ public MCSSolution call() throws Exception {
10771116
bm = AtomBondMatcher.bondMatcher(false, isHasPerfectRings());
10781117

10791118
LOGGER.debug("---1.3---");
1119+
MappingDiagnostics.recordSubstructureSearch(reactionId, algorithmName, invocationIndex);
10801120
substructure = MAPPING_ENGINE.findSubstructure(ac1, ac2,
10811121
am, bm, true, SINGLE_SUBGRAPH_MATCH, SUBGRAPH_TIMEOUT_MS);
10821122
} else if (moleculeConnected && !substructure.isSubgraph()) {
10831123
am = AtomBondMatcher.atomMatcher(false, false);
10841124
bm = AtomBondMatcher.bondMatcher(false, isHasPerfectRings());
10851125

10861126
LOGGER.debug("---1.2---");
1127+
MappingDiagnostics.recordSubstructureSearch(reactionId, algorithmName, invocationIndex);
10871128
substructure = MAPPING_ENGINE.findSubstructure(ac1, ac2, am, bm, true,
10881129
SINGLE_SUBGRAPH_MATCH, SUBGRAPH_TIMEOUT_MS);
10891130
}
@@ -1119,6 +1160,7 @@ public MCSSolution call() throws Exception {
11191160
am = AtomBondMatcher.atomMatcher(true, isHasPerfectRings());
11201161
bm = AtomBondMatcher.bondMatcher(false, isHasPerfectRings());
11211162

1163+
MappingDiagnostics.recordSubstructureSearch(reactionId, algorithmName, invocationIndex);
11221164
substructure = MAPPING_ENGINE.findSubstructure(ac2, ac1, am, bm, true,
11231165
SINGLE_SUBGRAPH_MATCH, SUBGRAPH_TIMEOUT_MS);
11241166

@@ -1127,13 +1169,15 @@ public MCSSolution call() throws Exception {
11271169
bm = AtomBondMatcher.bondMatcher(false, isHasPerfectRings());
11281170

11291171
LOGGER.debug("---2.3---");
1172+
MappingDiagnostics.recordSubstructureSearch(reactionId, algorithmName, invocationIndex);
11301173
substructure = MAPPING_ENGINE.findSubstructure(ac2, ac1, am, bm, true,
11311174
SINGLE_SUBGRAPH_MATCH, SUBGRAPH_TIMEOUT_MS);
11321175
} else if (moleculeConnected && !substructure.isSubgraph()) {
11331176
am = AtomBondMatcher.atomMatcher(false, false);
11341177
bm = AtomBondMatcher.bondMatcher(false, isHasPerfectRings());
11351178

11361179
LOGGER.debug("---2.2---");
1180+
MappingDiagnostics.recordSubstructureSearch(reactionId, algorithmName, invocationIndex);
11371181
substructure = MAPPING_ENGINE.findSubstructure(ac2, ac1, am, bm, true,
11381182
SINGLE_SUBGRAPH_MATCH, SUBGRAPH_TIMEOUT_MS);
11391183
}
@@ -1312,6 +1356,7 @@ MCSSolution mcs() throws CDKException, CloneNotSupportedException {
13121356
key = generateUniqueKey(settings);
13131357
if (ThreadSafeCache.getInstance().containsKey(key)) {
13141358
LOGGER.debug("===={Aladdin} Mapping {Gini}====");
1359+
MappingDiagnostics.recordMcsCacheHit(reactionId, algorithmName, invocationIndex);
13151360
MCSSolution solution = (MCSSolution) ThreadSafeCache.getInstance().get(key);
13161361
mcs = copyOldSolutionToNew(
13171362
getQueryPosition(), getTargetPosition(),
@@ -1324,6 +1369,7 @@ MCSSolution mcs() throws CDKException, CloneNotSupportedException {
13241369
mcsOptions.connectedOnly = isMoleculeConnected(ac1, ac2);
13251370
mcsOptions.disconnectedMCS = !mcsOptions.connectedOnly;
13261371
mcsOptions.maximizeBonds = settings.bondMatch;
1372+
MappingDiagnostics.recordActualMcsSearch(reactionId, algorithmName, invocationIndex);
13271373
isomorphism = MAPPING_ENGINE.findMcs(ac1, ac2, Algorithm.VFLibMCS, am, bm, mcsOptions);
13281374
mcs = addMCSSolution(key, ThreadSafeCache.getInstance(), isomorphism);
13291375
}

0 commit comments

Comments
 (0)