Skip to content

Commit 7659c02

Browse files
committed
fix(embeddings): add text truncation and fix lazy loading for Ollama provider
- Add context window-aware text truncation to prevent API errors - Implement conservative 2 chars/token ratio for code truncation - Fix eager transformers loading that caused hangs with Ollama - Move MODEL_CONFIGS inline to avoid importing heavy transformers module - Add support for model-specific context windows (nomic-embed-text, mxbai, etc.)
1 parent 75d66d3 commit 7659c02

2 files changed

Lines changed: 56 additions & 17 deletions

File tree

src/embeddings/index.ts

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
export * from './types.js';
2-
export * from './transformers.js';
32

43
import {
54
EmbeddingProvider,
@@ -8,14 +7,22 @@ import {
87
DEFAULT_MODEL,
98
parseEmbeddingProviderName
109
} from './types.js';
11-
import { TransformersEmbeddingProvider, MODEL_CONFIGS } from './transformers.js';
10+
11+
// Model configs for dimension lookups (sync, no heavy dependencies)
12+
// This avoids loading the full transformers module at import time
13+
const TRANSFORMERS_MODEL_CONFIGS: Record<string, { dimensions: number; maxContext: number }> = {
14+
'Xenova/bge-small-en-v1.5': { dimensions: 384, maxContext: 512 },
15+
'Xenova/all-MiniLM-L6-v2': { dimensions: 384, maxContext: 512 },
16+
'Xenova/bge-base-en-v1.5': { dimensions: 768, maxContext: 512 },
17+
'onnx-community/granite-embedding-small-english-r2-ONNX': { dimensions: 384, maxContext: 8192 }
18+
};
1219

1320
/**
1421
* Returns expected embedding dimensions for a given config without initializing any provider.
1522
* Used for LanceDB dimension validation before committing to an incremental update.
1623
*
17-
* Looks up dimensions from MODEL_CONFIGS (the authoritative source shared with the provider
18-
* implementation) so new models are automatically handled without updating this function.
24+
* Looks up dimensions from TRANSFORMERS_MODEL_CONFIGS for local models and handles
25+
* remote providers (OpenAI, Ollama) with their specific dimension logic.
1926
*/
2027
export function getConfiguredDimensions(config: Partial<EmbeddingConfig> = {}): number {
2128
const provider =
@@ -30,12 +37,12 @@ export function getConfiguredDimensions(config: Partial<EmbeddingConfig> = {}):
3037
'mxbai-embed-large': 1024,
3138
'mxbai-embed-large:latest': 1024,
3239
'all-minilm': 384,
33-
'all-minilm:latest': 384,
40+
'all-minilm:latest': 384
3441
};
3542
return ollamaDimensions[model] || 768;
3643
}
37-
// Look up from the same MODEL_CONFIGS the provider uses — avoids stale hardcoded guesses
38-
return MODEL_CONFIGS[model]?.dimensions ?? 384;
44+
// Look up from the local config for transformers provider
45+
return TRANSFORMERS_MODEL_CONFIGS[model]?.dimensions ?? 384;
3946
}
4047

4148
let cachedProvider: EmbeddingProvider | null = null;
@@ -64,10 +71,6 @@ export async function getEmbeddingProvider(
6471
return provider;
6572
}
6673

67-
if (mergedConfig.provider === 'custom') {
68-
throw new Error("Custom provider not implemented. Use 'openai' or 'transformers'.");
69-
}
70-
7174
if (mergedConfig.provider === 'ollama') {
7275
const { OllamaEmbeddingProvider } = await import('./ollama.js');
7376
const provider = new OllamaEmbeddingProvider(
@@ -80,10 +83,16 @@ export async function getEmbeddingProvider(
8083
return provider;
8184
}
8285

86+
// Default: transformers (lazy loaded)
87+
const { TransformersEmbeddingProvider } = await import('./transformers.js');
8388
const provider = new TransformersEmbeddingProvider(mergedConfig.model);
8489
await provider.initialize();
8590
cachedProvider = provider;
8691
cachedProviderType = providerKey;
8792

8893
return provider;
8994
}
95+
96+
// Re-export TransformersEmbeddingProvider and MODEL_CONFIGS for consumers who need them
97+
// These will trigger transformers loading, but only when explicitly imported
98+
export { TransformersEmbeddingProvider, MODEL_CONFIGS } from './transformers.js';

src/embeddings/ollama.ts

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,32 @@ interface OllamaEmbeddingResponse {
44
embedding: number[];
55
}
66

7+
// Context window sizes for common Ollama embedding models (in tokens)
8+
const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
9+
'nomic-embed-text': 2048,
10+
'nomic-embed-text:latest': 2048,
11+
'mxbai-embed-large': 512,
12+
'mxbai-embed-large:latest': 512,
13+
'all-minilm': 512,
14+
'all-minilm:latest': 512
15+
};
16+
17+
// Conservative character limit (approx 2 chars per token for code)
18+
// Code has more tokens per character due to punctuation and symbols
19+
function getMaxChars(modelName: string): number {
20+
const tokens = MODEL_CONTEXT_WINDOWS[modelName] || 2048;
21+
return tokens * 2; // Very conservative: 2 chars per token
22+
}
23+
724
/**
825
* Ollama Embedding Provider
926
* Supports local embedding models via Ollama API.
1027
* API endpoint: POST /api/embeddings
1128
*/
1229
export class OllamaEmbeddingProvider implements EmbeddingProvider {
1330
readonly name = 'ollama';
14-
31+
private maxChars: number;
32+
1533
// Default dimensions for nomic-embed-text (768)
1634
// Override via EMBEDDING_MODEL env var for other models
1735
get dimensions(): number {
@@ -22,15 +40,17 @@ export class OllamaEmbeddingProvider implements EmbeddingProvider {
2240
'mxbai-embed-large': 1024,
2341
'mxbai-embed-large:latest': 1024,
2442
'all-minilm': 384,
25-
'all-minilm:latest': 384,
43+
'all-minilm:latest': 384
2644
};
2745
return modelDimensions[this.modelName] || 768;
2846
}
2947

3048
constructor(
3149
readonly modelName: string = 'nomic-embed-text',
3250
private apiEndpoint: string = 'http://localhost:11434'
33-
) {}
51+
) {
52+
this.maxChars = getMaxChars(modelName);
53+
}
3454

3555
async initialize(): Promise<void> {
3656
// Ollama doesn't require an API key
@@ -42,6 +62,13 @@ export class OllamaEmbeddingProvider implements EmbeddingProvider {
4262
return true;
4363
}
4464

65+
private truncateText(text: string): string {
66+
if (text.length <= this.maxChars) {
67+
return text;
68+
}
69+
return text.slice(0, this.maxChars);
70+
}
71+
4572
async embed(text: string): Promise<number[]> {
4673
const batch = await this.embedBatch([text]);
4774
return batch[0];
@@ -55,15 +82,18 @@ export class OllamaEmbeddingProvider implements EmbeddingProvider {
5582
// Ollama embeddings API processes one text at a time
5683
for (const text of texts) {
5784
try {
85+
// Truncate text to fit within model's context window
86+
const truncatedText = this.truncateText(text);
87+
5888
const response = await fetch(`${this.apiEndpoint}/api/embeddings`, {
5989
method: 'POST',
6090
headers: {
61-
'Content-Type': 'application/json',
91+
'Content-Type': 'application/json'
6292
},
6393
body: JSON.stringify({
6494
model: this.modelName,
65-
prompt: text,
66-
}),
95+
prompt: truncatedText
96+
})
6797
});
6898

6999
if (!response.ok) {

0 commit comments

Comments
 (0)