@@ -16,23 +16,28 @@ public class CohereTokenizer
1616{
1717 public Dictionary < string , TiktokenTokenizer > Tokenizers { get ; set ; } = new ( ) ;
1818
19- public CohereTokenizer ( IHttpClientFactory httpClientFactory )
20- {
21- var tokenizerFile = Path . Combine ( AppDomain . CurrentDomain . BaseDirectory , "command-r-plus.tiktoken" ) ;
22- var tokenizerExtraFile = Path . Combine ( AppDomain . CurrentDomain . BaseDirectory , "command-r-plus.tiktoken.extra" ) ;
23-
24- DownloadCohereTokenizerSpecifcationFileAndConvertToTiktoken (
25- httpClientFactory ,
26- "https://storage.googleapis.com/cohere-public/tokenizers/command-r-plus.json" ,
27- tokenizerFile ,
28- tokenizerExtraFile ) ;
29-
30- //now we need to load the tokenizer, first of all we load the extra data
31- var extraData = File . ReadAllText ( tokenizerExtraFile ) ;
32- var ed = JsonSerializer . Deserialize < ExtraTokenizerData > ( extraData ) ! ;
33-
34- var tiktoken = TiktokenTokenizer . Create ( tokenizerFile , null , null , specialTokens : ed . GetSpecialToken ( ) ) ;
35- Tokenizers [ "command-r-plus" ] = tiktoken ;
19+ public CohereTokenizer ( IHttpClientFactory httpClientFactory , IEnumerable < string > ? modelNames = null )
20+ {
21+ var models = modelNames ? . ToArray ( ) ?? new [ ] { CohereModels . CommandDefault } ;
22+
23+ foreach ( var modelName in models )
24+ {
25+ var tokenizerFile = Path . Combine ( AppDomain . CurrentDomain . BaseDirectory , $ "{ modelName } .tiktoken") ;
26+ var tokenizerExtraFile = Path . Combine ( AppDomain . CurrentDomain . BaseDirectory , $ "{ modelName } .tiktoken.extra") ;
27+
28+ DownloadCohereTokenizerSpecifcationFileAndConvertToTiktoken (
29+ httpClientFactory ,
30+ $ "https://storage.googleapis.com/cohere-public/tokenizers/{ modelName } .json",
31+ tokenizerFile ,
32+ tokenizerExtraFile ) ;
33+
34+ //now we need to load the tokenizer, first of all we load the extra data
35+ var extraData = File . ReadAllText ( tokenizerExtraFile ) ;
36+ var ed = JsonSerializer . Deserialize < ExtraTokenizerData > ( extraData ) ! ;
37+
38+ var tiktoken = TiktokenTokenizer . Create ( tokenizerFile , null , null , specialTokens : ed . GetSpecialToken ( ) ) ;
39+ Tokenizers [ modelName ] = tiktoken ;
40+ }
3641 }
3742
3843 private static void DownloadCohereTokenizerSpecifcationFileAndConvertToTiktoken (
@@ -41,7 +46,7 @@ private static void DownloadCohereTokenizerSpecifcationFileAndConvertToTiktoken(
4146 string tokenizerFile ,
4247 string tokenizerExtraFile )
4348 {
44- if ( ! File . Exists ( tokenizerFile ) )
49+ if ( ! File . Exists ( tokenizerFile ) )
4550 {
4651 var client = httpClientFactory . CreateClient ( ) ;
4752 var response = client . GetAsync ( definitionLocation ) . Result ;
0 commit comments