Skip to content

Commit f5e9858

Browse files
committed
Some fixes
1 parent e3073cc commit f5e9858

4 files changed

Lines changed: 35 additions & 25 deletions

File tree

src/KernelMemory.Extensions.ConsoleTest/Samples/CustomSearchPipelineBase.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -453,18 +453,18 @@ private static IKernelBuilder CreateBasicKernelBuilder()
453453
.AddLogger(s => _loggingProvider.CreateHttpRequestBodyLogger(s.GetRequiredService<ILogger<DumpLoggingProvider>>())));
454454

455455
kernelBuilder.Services.AddAzureOpenAIChatCompletion(
456-
"GPT35_2",
456+
"gpt-4o-mini",
457457
Dotenv.Get("OPENAI_API_BASE") ?? throw new ConfigurationException("OPENAI_API_BASE missing from .env file"),
458458
Dotenv.Get("OPENAI_API_KEY") ?? throw new ConfigurationException("OPENAI_API_KEY missing from .env file"),
459-
serviceId: "gpt35",
460-
modelId: "gpt35");
459+
serviceId: "gpt-4o-mini",
460+
modelId: "gpt-4o-mini");
461461

462462
kernelBuilder.Services.AddAzureOpenAIChatCompletion(
463-
"GPT4o", //"GPT35_2",//"GPT42",
463+
"gpt-4o", //"GPT35_2",//"GPT42",
464464
Dotenv.Get("OPENAI_API_BASE") ?? throw new ConfigurationException("OPENAI_API_BASE missing from .env file"),
465465
Dotenv.Get("OPENAI_API_KEY") ?? throw new ConfigurationException("OPENAI_API_KEY missing from .env file"),
466466
serviceId: "default",
467-
modelId: "gpt4o");
467+
modelId: "gpt-4o");
468468

469469
return kernelBuilder;
470470
}

src/KernelMemory.Extensions.FunctionalTests/Cohere/CohereTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public async Task Basic_cohere_embed_test()
139139
public void Tokenizer_raw_test()
140140
{
141141
CohereTokenizer tokenizer = new(_httpClientFactory);
142-
var count = tokenizer.CountToken("command-r-plus", "Now I'm using CommandR+ tokenizer");
142+
var count = tokenizer.CountToken(CohereModels.CommandDefault, "Now I'm using CommandR+ tokenizer");
143143
Assert.Equal(8, count);
144144
}
145145

src/KernelMemory.Extensions/Cohere/CohereTokenizer.cs

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,28 @@ public class CohereTokenizer
1616
{
1717
public Dictionary<string, TiktokenTokenizer> Tokenizers { get; set; } = new();
1818

19-
public CohereTokenizer(IHttpClientFactory httpClientFactory)
20-
{
21-
var tokenizerFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "command-r-plus.tiktoken");
22-
var tokenizerExtraFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "command-r-plus.tiktoken.extra");
23-
24-
DownloadCohereTokenizerSpecifcationFileAndConvertToTiktoken(
25-
httpClientFactory,
26-
"https://storage.googleapis.com/cohere-public/tokenizers/command-r-plus.json",
27-
tokenizerFile,
28-
tokenizerExtraFile);
29-
30-
//now we need to load the tokenizer, first of all we load the extra data
31-
var extraData = File.ReadAllText(tokenizerExtraFile);
32-
var ed = JsonSerializer.Deserialize<ExtraTokenizerData>(extraData)!;
33-
34-
var tiktoken = TiktokenTokenizer.Create(tokenizerFile, null, null, specialTokens: ed.GetSpecialToken());
35-
Tokenizers["command-r-plus"] = tiktoken;
19+
public CohereTokenizer(IHttpClientFactory httpClientFactory, IEnumerable<string>? modelNames = null)
20+
{
21+
var models = modelNames?.ToArray() ?? new[] { CohereModels.CommandDefault };
22+
23+
foreach (var modelName in models)
24+
{
25+
var tokenizerFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"{modelName}.tiktoken");
26+
var tokenizerExtraFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"{modelName}.tiktoken.extra");
27+
28+
DownloadCohereTokenizerSpecifcationFileAndConvertToTiktoken(
29+
httpClientFactory,
30+
$"https://storage.googleapis.com/cohere-public/tokenizers/{modelName}.json",
31+
tokenizerFile,
32+
tokenizerExtraFile);
33+
34+
//now we need to load the tokenizer, first of all we load the extra data
35+
var extraData = File.ReadAllText(tokenizerExtraFile);
36+
var ed = JsonSerializer.Deserialize<ExtraTokenizerData>(extraData)!;
37+
38+
var tiktoken = TiktokenTokenizer.Create(tokenizerFile, null, null, specialTokens: ed.GetSpecialToken());
39+
Tokenizers[modelName] = tiktoken;
40+
}
3641
}
3742

3843
private static void DownloadCohereTokenizerSpecifcationFileAndConvertToTiktoken(
@@ -41,7 +46,7 @@ private static void DownloadCohereTokenizerSpecifcationFileAndConvertToTiktoken(
4146
string tokenizerFile,
4247
string tokenizerExtraFile)
4348
{
44-
if (!File.Exists(tokenizerFile))
49+
if (!File.Exists(tokenizerFile))
4550
{
4651
var client = httpClientFactory.CreateClient();
4752
var response = client.GetAsync(definitionLocation).Result;

src/KernelMemory.Extensions/Cohere/RawCohereClientDtos.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ internal string Describe()
9595
public string Message { get; set; }
9696

9797
[JsonPropertyName("model")]
98-
public string Model { get; set; } = "command-r-plus";
98+
public string Model { get; set; } = CohereModels.CommandDefault;
9999

100100
[JsonPropertyName("documents")]
101101
public List<RagDocument> Documents { get; set; }
@@ -425,6 +425,11 @@ public static class CohereModels
425425
public const string EmbedEnglishV2 = "embed-english-v2.0";
426426
public const string EmbedEnglishLightV2 = "embed-english-light-v2.0";
427427
public const string EmbedMultilingualV2 = "embed-multilingual-v2.0";
428+
// Command/chat family models
429+
430+
public const string CommandA032025 = "command-a-03-2025";
431+
// Default command model used by the RAG/Chat client
432+
public const string CommandDefault = CommandA032025;
428433
}
429434

430435
public class CohereEmbedRequest

0 commit comments

Comments
 (0)