-
Notifications
You must be signed in to change notification settings - Fork 32
Add tests for ModelCapabilities InferFromName and fuzzy prefix matching #775
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2398,6 +2398,199 @@ public void GetStrengths_ReturnsDescription() | |
| Assert.NotEqual("Unknown model", strengths); | ||
| Assert.Contains("reasoning", strengths, StringComparison.OrdinalIgnoreCase); | ||
| } | ||
|
|
||
| // --- InferFromName: family branches --- | ||
|
|
||
| [Theory] | ||
| [InlineData("claude-opus-99", ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse)] | ||
| [InlineData("my-opus-model", ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse)] | ||
| public void InferFromName_OpusFamily_ReturnsReasoningExpertCodeExpertToolUse(string slug, ModelCapability expected) | ||
| { | ||
| var caps = ModelCapabilities.InferFromName(slug); | ||
| Assert.Equal(expected, caps); | ||
| } | ||
|
|
||
| [Theory] | ||
| [InlineData("claude-sonnet-5", ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast)] | ||
| [InlineData("sonnet-next", ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast)] | ||
| public void InferFromName_SonnetFamily_ReturnsCodeExpertToolUseFast(string slug, ModelCapability expected) | ||
| { | ||
| var caps = ModelCapabilities.InferFromName(slug); | ||
| Assert.Equal(expected, caps); | ||
| } | ||
|
|
||
| [Theory] | ||
| [InlineData("claude-haiku-5", ModelCapability.Fast | ModelCapability.CostEfficient)] | ||
| [InlineData("haiku-lite", ModelCapability.Fast | ModelCapability.CostEfficient)] | ||
| public void InferFromName_HaikuFamily_ReturnsFastCostEfficient(string slug, ModelCapability expected) | ||
| { | ||
| var caps = ModelCapabilities.InferFromName(slug); | ||
| Assert.Equal(expected, caps); | ||
| } | ||
|
|
||
| [Theory] | ||
| [InlineData("gemini-4-pro")] | ||
| [InlineData("gemini-flash")] | ||
| public void InferFromName_GeminiFamily_ReturnsReasoningExpertLargeContextVision(string slug) | ||
| { | ||
| var caps = ModelCapabilities.InferFromName(slug); | ||
| // Core gemini-family flags must be present | ||
| Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); | ||
| Assert.True(caps.HasFlag(ModelCapability.LargeContext)); | ||
| Assert.True(caps.HasFlag(ModelCapability.Vision)); | ||
| // Note: "gemini" contains "mini" as a substring, so the mini-variant logic also fires, | ||
| // adding Fast|CostEfficient. This is expected production code behaviour. | ||
| Assert.True(caps.HasFlag(ModelCapability.Fast)); | ||
| Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); | ||
|
Comment on lines
+2441
to
+2444
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 MODERATE — Test locks in a substring-collision defect as "expected behaviour"
The test comment calls this "expected production code behaviour," which effectively approves the defect and will prevent future maintainers from recognising it as a bug. Any novel Gemini model inferred from its name will be misclassified, potentially skewing role-assignment logic in Suggestion: Reword the test comment to acknowledge this as a known defect (e.g., Flagged by: 3/3 reviewers |
||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Test locks in a production bug — The production code at This test asserts those incorrect flags and documents it as "expected production code behaviour", which locks in the bug — any future fix to the variant matching would break these assertions. Concrete impact: Suggested fix — in if (lower.Contains("mini")) caps |= ...to: if (lower.Contains("-mini")) caps |= ...All real "mini" variant slugs use a |
||
|
|
||
| [Fact] | ||
| public void InferFromName_CodexVariant_AddsCodeExpert() | ||
| { | ||
|
Comment on lines
+2444
to
+2449
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 MINOR · 2/3 reviewers Enshrining a production quirk as required test behavior. These assertions lock in the Suggestion: Consider splitting the gemini assertions — keep |
||
| var caps = ModelCapabilities.InferFromName("new-model-codex"); | ||
| Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 MINOR — Variant-only tests use These inputs ( Suggestion: Use Assert.Equal(ModelCapability.CodeExpert, caps);Same for the mini and max variant tests. |
||
| } | ||
|
|
||
| [Fact] | ||
| public void InferFromName_MiniVariant_AddsFastCostEfficient() | ||
| { | ||
| var caps = ModelCapabilities.InferFromName("new-model-mini"); | ||
| Assert.True(caps.HasFlag(ModelCapability.Fast)); | ||
| Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void InferFromName_MaxVariant_AddsReasoningExpert() | ||
| { | ||
| var caps = ModelCapabilities.InferFromName("new-model-max"); | ||
| Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void InferFromName_SonnetCodexCombined_HasBothFamilyAndVariantFlags() | ||
| { | ||
| // sonnet family gives CodeExpert|ToolUse|Fast; codex adds CodeExpert (idempotent via flags) | ||
| var caps = ModelCapabilities.InferFromName("sonnet-codex-experimental"); | ||
| Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); | ||
| Assert.True(caps.HasFlag(ModelCapability.ToolUse)); | ||
| Assert.True(caps.HasFlag(ModelCapability.Fast)); | ||
| } | ||
|
|
||
| [Fact] | ||
|
Comment on lines
+2471
to
+2479
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Tautological test — codex branch is unobservable The sonnet family already provides Concrete failing scenario: Delete line 79 ( Suggested fix: Use a family that does NOT include [Fact]
public void InferFromName_HaikuCodexCombined_AddsBothFamilyAndVariantFlags()
{
// haiku → Fast|CostEfficient; codex → |= CodeExpert (observable addition)
var caps = ModelCapabilities.InferFromName("haiku-codex-v2");
Assert.True(caps.HasFlag(ModelCapability.Fast));
Assert.True(caps.HasFlag(ModelCapability.CostEfficient));
Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); // fails if codex branch removed
} |
||
| public void InferFromName_CompletelyUnknown_ReturnsNone() | ||
| { | ||
| var caps = ModelCapabilities.InferFromName("totally-unknown-xyz"); | ||
| Assert.Equal(ModelCapability.None, caps); | ||
| } | ||
|
|
||
| // --- GetCapabilities: fuzzy prefix matching --- | ||
|
|
||
| [Theory] | ||
| [InlineData("claude-opus-4.6-1m")] | ||
| [InlineData("claude-opus-4.6-preview")] | ||
| [InlineData("claude-opus-4.6-extended")] | ||
| public void GetCapabilities_FuzzyPrefix_OpusVariant_ReturnsOpusFlags(string slug) | ||
| { | ||
| var caps = ModelCapabilities.GetCapabilities(slug); | ||
| // claude-opus-4.6 registry entry: ReasoningExpert|CodeExpert|ToolUse|LargeContext | ||
| Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); | ||
|
Comment on lines
+2492
to
+2496
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 MINOR — Fuzzy-prefix tests use All three fuzzy-prefix tests ( Since fuzzy-prefix resolution is expected to mirror a specific registry entry exactly, Note: |
||
| Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); | ||
| Assert.True(caps.HasFlag(ModelCapability.ToolUse)); | ||
| Assert.True(caps.HasFlag(ModelCapability.LargeContext)); | ||
| } | ||
|
|
||
| [Theory] | ||
| [InlineData("claude-sonnet-4.5-20250514")] | ||
| [InlineData("claude-sonnet-4.5-preview")] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 MODERATE — Non-deterministic fuzzy match:
The test passes today only because both keys share identical flags ( The same ambiguity affects Fix: Use a slug that unambiguously matches exactly one registry key, or add an assertion that verifies the exact matched key. |
||
| public void GetCapabilities_FuzzyPrefix_SonnetVariant_ReturnsSonnetFlags(string slug) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 MINOR — Fuzzy-prefix test relies on Dictionary iteration order
Suggestion: Either use test inputs that unambiguously match a single registry key, or add a comment documenting that correctness depends on both keys sharing the same flags. Flagged by: 2/3 reviewers |
||
| { | ||
| var caps = ModelCapabilities.GetCapabilities(slug); | ||
| // claude-sonnet-4.5 registry entry: CodeExpert|ToolUse|Fast | ||
| Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); | ||
| Assert.True(caps.HasFlag(ModelCapability.ToolUse)); | ||
| Assert.True(caps.HasFlag(ModelCapability.Fast)); | ||
| } | ||
|
Comment on lines
+2502
to
+2512
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 MINOR — Fuzzy-match test relies on non-deterministic Dictionary iteration order
Both entries have identical caps today ( Suggestion: Add a comment noting the assumption: // Both claude-sonnet-4.5 and claude-sonnet-4 share identical caps today;
// if they diverge, fuzzy matching will need longest-prefix-wins logic. |
||
|
|
||
| [Fact] | ||
| public void GetCapabilities_FuzzyPrefix_GptMiniVariant_ReturnsGpt4Flags() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 MINOR — Misleading test method name This tests
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 MINOR — Misleading test name
Suggestion: Rename to Flagged by: 2/3 reviewers
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 MINOR — Misleading test name
Suggestion: Rename to something like |
||
| { | ||
| // "gpt-4.1" has Fast|CostEfficient|ToolUse; "gpt-4.1-preview" unambiguously fuzzy-matches it | ||
|
Comment on lines
+2510
to
+2517
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 False-positive test — cannot detect fuzzy-matching regressions Both sonnet registry entries ( This means if the entire fuzzy-prefix loop in Concrete failing scenario: Delete the Suggested fix: Replace with a model whose registry entry diverges from [Fact]
public void GetCapabilities_FuzzyPrefix_HaikuVariant_ReturnsRegistryFlags()
{
var caps = ModelCapabilities.GetCapabilities("claude-haiku-4.5-fast");
// Registry: Fast|CostEfficient|ToolUse; InferFromName: Fast|CostEfficient (no ToolUse)
Assert.True(caps.HasFlag(ModelCapability.ToolUse)); // only passes via fuzzy
} |
||
| // (no shorter registry key matches "gpt-4.1-preview") | ||
| var caps = ModelCapabilities.GetCapabilities("gpt-4.1-preview"); | ||
| Assert.True(caps.HasFlag(ModelCapability.Fast)); | ||
| Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); | ||
| Assert.True(caps.HasFlag(ModelCapability.ToolUse)); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void GetCapabilities_FuzzyPrefix_MatchesSameAsExact() | ||
| { | ||
| // Fuzzy-matched variant should return identical flags to the exact registry entry | ||
| var exact = ModelCapabilities.GetCapabilities("claude-opus-4.6"); | ||
| var fuzzy = ModelCapabilities.GetCapabilities("claude-opus-4.6-1m"); | ||
| Assert.Equal(exact, fuzzy); | ||
| } | ||
|
|
||
| // --- GetStrengths: all paths --- | ||
|
|
||
| [Theory] | ||
| [InlineData("gpt-5")] | ||
| [InlineData("claude-opus-4.5")] | ||
| [InlineData("claude-haiku-4.5")] | ||
| [InlineData("gemini-3-pro")] | ||
| public void GetStrengths_KnownModels_ReturnNonEmptyRegistryString(string slug) | ||
| { | ||
| var strengths = ModelCapabilities.GetStrengths(slug); | ||
| Assert.NotEmpty(strengths); | ||
| Assert.DoesNotContain("Unknown", strengths, StringComparison.OrdinalIgnoreCase); | ||
| Assert.DoesNotContain("Inferred", strengths, StringComparison.OrdinalIgnoreCase); | ||
|
Comment on lines
+2544
to
+2546
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 MINOR — Known-model strengths test uses only negative assertions (2/3 reviewers)
Suggestion (optional): If desired, add one keyword per model: e.g., |
||
| } | ||
|
|
||
| [Theory] | ||
| [InlineData("claude-opus-4.6-1m")] | ||
| [InlineData("claude-sonnet-4.5-preview")] | ||
| public void GetStrengths_FuzzyMatchedModels_ReturnRegistryStrengths(string slug) | ||
| { | ||
| var strengths = ModelCapabilities.GetStrengths(slug); | ||
| Assert.NotEmpty(strengths); | ||
| Assert.DoesNotContain("Unknown", strengths, StringComparison.OrdinalIgnoreCase); | ||
| Assert.DoesNotContain("Inferred", strengths, StringComparison.OrdinalIgnoreCase); | ||
|
Comment on lines
+2554
to
+2557
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 MODERATE — Fuzzy-match strengths test doesn't verify correct registry mapping (3/3 reviewers) These negative-only assertions ( Suggestion: Pin the fuzzy result to its expected canonical entry: Assert.Equal(ModelCapabilities.GetStrengths("claude-opus-4.6"), strengths);
// (and similarly for "claude-sonnet-4.5-preview" → "claude-sonnet-4.5")This mirrors the pattern already used in |
||
| } | ||
|
|
||
| [Theory] | ||
| [InlineData("claude-opus-99")] | ||
| [InlineData("claude-sonnet-10")] | ||
| [InlineData("gemini-5-pro")] | ||
| public void GetStrengths_InferredModels_ReturnInferredPrefix(string slug) | ||
| { | ||
| var strengths = ModelCapabilities.GetStrengths(slug); | ||
| Assert.StartsWith("Inferred:", strengths, StringComparison.OrdinalIgnoreCase); | ||
| Assert.True(strengths.Length > "Inferred:".Length, "Inferred strengths must contain content after the prefix"); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void GetStrengths_InferredOpus_ContainsReasoningAndCode() | ||
| { | ||
| var strengths = ModelCapabilities.GetStrengths("claude-opus-99"); | ||
| Assert.Contains("reasoning", strengths, StringComparison.OrdinalIgnoreCase); | ||
| Assert.Contains("code", strengths, StringComparison.OrdinalIgnoreCase); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void GetStrengths_InferredMini_ContainsFastAndCostEfficient() | ||
| { | ||
| var strengths = ModelCapabilities.GetStrengths("new-model-mini"); | ||
| Assert.StartsWith("Inferred:", strengths, StringComparison.OrdinalIgnoreCase); | ||
| Assert.Contains("fast", strengths, StringComparison.OrdinalIgnoreCase); | ||
| Assert.Contains("cost-efficient", strengths, StringComparison.OrdinalIgnoreCase); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void GetStrengths_CompletelyUnknown_ReturnsUnknownModel() | ||
| { | ||
| var strengths = ModelCapabilities.GetStrengths("totally-unknown-xyz-abc"); | ||
| Assert.Equal("Unknown model", strengths); | ||
| } | ||
| } | ||
|
|
||
| public class GroupPresetTests | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 MODERATE — Test cements a production code bug as "expected behaviour"
InferFromName(ModelCapabilities.cs:80) useslower.Contains("mini"), which falsely matches"ge**mini**". Calling this "expected production code behaviour" discourages fixing it, and any future fix to use word-boundary matching would break these assertions.Suggestion: Reword the comment to acknowledge this as a known limitation: