Skip to content

Commit 95b0ad5

Browse files
author
Shangmin Dou
committed
<fix>[core]: redesign StringSimilarity to match fmt template first
Redesign findSimilar() with a three-phase strategy to prevent performance degradation when operr() format args contain very long strings (e.g., serialized ErrorCodeList or HTML bodies): Phase 1: regex match against raw fmt template (always short). Phase 2: fallback to formatted string only if Phase 1 misses and length <= maxElaborationRegex (8192). Phase 3: distance match always uses raw fmt template. Also add length guard in findMostSimilarRegex() and remove redundant String.format length check in Platform.elaborate(). Root cause: StringSimilarity.findSimilar() was running 199 regex patterns via ReTree against multi-KB formatted strings, causing 7+ second latency in error code creation hot paths. Resolves: ZSTAC-72079 Change-Id: I38b98a762deb436da31e4884da05d12b38b98a76
1 parent c05b8f7 commit 95b0ad5

2 files changed

Lines changed: 41 additions & 14 deletions

File tree

core/src/main/java/org/zstack/core/Platform.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -938,10 +938,6 @@ public static boolean killProcess(int pid, Integer timeout) {
938938

939939
private static ErrorCodeElaboration elaborate(String fmt, Object...args) {
940940
try {
941-
if (String.format(fmt, args).length() > StringSimilarity.maxElaborationRegex) {
942-
return null;
943-
}
944-
945941
ErrorCodeElaboration elaboration = StringSimilarity.findSimilar(fmt, args);
946942
if (elaboration == null) {
947943
return null;

utils/src/main/java/org/zstack/utils/string/StringSimilarity.java

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,15 @@ private static void logSearchSpend(String sub, long start, boolean found) {
287287
/**
288288
* find the most similar error code elaboration for the given error message.
289289
*
290+
* The method uses a two-phase strategy to avoid performance degradation
291+
* when format args produce very long strings (e.g., serialized error chains
292+
* or HTML response bodies):
293+
*
294+
* Phase 1: Try regex matching with the formatted string (length-guarded).
295+
* Phase 2: If Phase 1 misses (or formatted string too long), fallback
296+
* to the raw fmt template for regex matching.
297+
* Phase 3: Distance matching always uses the raw fmt template.
298+
*
290299
* @param sub error message or error message fmt
291300
* @param args arguments
292301
* @return the most similar error code elaboration
@@ -311,23 +320,41 @@ public static ErrorCodeElaboration findSimilar(String sub, Object...args) {
311320
errors.remove(sub);
312321
}
313322

314-
if (args != null && missed.get(String.format(sub, args)) != null) {
315-
logSearchSpend(sub, start, false);
316-
return null;
317-
} else if (missed.get(sub) != null) {
323+
// check missed cache for both fmt template and formatted string
324+
if (missed.get(sub) != null) {
318325
logSearchSpend(sub, start, false);
319326
return null;
320327
}
328+
if (args != null) {
329+
try {
330+
String formatted = String.format(sub, args);
331+
if (missed.get(formatted) != null) {
332+
logSearchSpend(sub, start, false);
333+
return null;
334+
}
335+
} catch (Exception e) {
336+
logger.trace(String.format("failed to format elaboration key: %s", e.getMessage()));
337+
}
338+
}
321339

322-
try {
323-
logger.trace(String.format("start to search elaboration for: %s", String.format(sub, args)));
324-
err = findMostSimilarRegex(String.format(sub, args));
325-
} catch (Exception e) {
326-
logger.trace(String.format("start search elaboration for: %s", sub));
340+
// Phase 1: try regex matching with formatted string (guarded by length limit)
341+
if (args != null && args.length > 0) {
342+
try {
343+
String formatted = String.format(sub, args);
344+
if (formatted.length() <= maxElaborationRegex) {
345+
err = findMostSimilarRegex(formatted);
346+
}
347+
} catch (Exception e) {
348+
logger.trace(String.format("failed to format for regex matching: %s", e.getMessage()));
349+
}
350+
}
351+
352+
// Phase 2: if formatted string missed or was too long, fallback to raw fmt template
353+
if (err == null) {
327354
err = findMostSimilarRegex(sub);
328355
}
329356

330-
// find by distance is not reliable disable it for now
357+
// Phase 3: distance matching uses the raw fmt template (always short)
331358
if (err == null) {
332359
err = findSimilarDistance(sub);
333360
}
@@ -355,6 +382,10 @@ private static boolean verifyElaboration(ErrorCodeElaboration elaboration, Strin
355382

356383
// better precision, worse performance
357384
private static ErrorCodeElaboration findMostSimilarRegex(String sub) {
385+
if (sub.length() > maxElaborationRegex) {
386+
return null;
387+
}
388+
358389
if (!isRegexMatchedByRetrees(sub)) {
359390
return null;
360391
}

0 commit comments

Comments
 (0)