11export * from './types.js' ;
2- export * from './transformers.js' ;
32
43import {
54 EmbeddingProvider ,
@@ -8,14 +7,22 @@ import {
87 DEFAULT_MODEL ,
98 parseEmbeddingProviderName
109} from './types.js' ;
11- import { TransformersEmbeddingProvider , MODEL_CONFIGS } from './transformers.js' ;
10+
11+ // Model configs for dimension lookups (sync, no heavy dependencies)
12+ // This avoids loading the full transformers module at import time
13+ const TRANSFORMERS_MODEL_CONFIGS : Record < string , { dimensions : number ; maxContext : number } > = {
14+ 'Xenova/bge-small-en-v1.5' : { dimensions : 384 , maxContext : 512 } ,
15+ 'Xenova/all-MiniLM-L6-v2' : { dimensions : 384 , maxContext : 512 } ,
16+ 'Xenova/bge-base-en-v1.5' : { dimensions : 768 , maxContext : 512 } ,
17+ 'onnx-community/granite-embedding-small-english-r2-ONNX' : { dimensions : 384 , maxContext : 8192 }
18+ } ;
1219
1320/**
1421 * Returns expected embedding dimensions for a given config without initializing any provider.
1522 * Used for LanceDB dimension validation before committing to an incremental update.
1623 *
17- * Looks up dimensions from MODEL_CONFIGS (the authoritative source shared with the provider
18- * implementation) so new models are automatically handled without updating this function .
24+ * Looks up dimensions from TRANSFORMERS_MODEL_CONFIGS for local models and handles
25+ * remote providers (OpenAI, Ollama) with their specific dimension logic .
1926 */
2027export function getConfiguredDimensions ( config : Partial < EmbeddingConfig > = { } ) : number {
2128 const provider =
@@ -30,12 +37,12 @@ export function getConfiguredDimensions(config: Partial<EmbeddingConfig> = {}):
3037 'mxbai-embed-large' : 1024 ,
3138 'mxbai-embed-large:latest' : 1024 ,
3239 'all-minilm' : 384 ,
33- 'all-minilm:latest' : 384 ,
40+ 'all-minilm:latest' : 384
3441 } ;
3542 return ollamaDimensions [ model ] || 768 ;
3643 }
37- // Look up from the same MODEL_CONFIGS the provider uses — avoids stale hardcoded guesses
38- return MODEL_CONFIGS [ model ] ?. dimensions ?? 384 ;
44+ // Look up from the local config for transformers provider
45+ return TRANSFORMERS_MODEL_CONFIGS [ model ] ?. dimensions ?? 384 ;
3946}
4047
4148let cachedProvider : EmbeddingProvider | null = null ;
@@ -64,10 +71,6 @@ export async function getEmbeddingProvider(
6471 return provider ;
6572 }
6673
67- if ( mergedConfig . provider === 'custom' ) {
68- throw new Error ( "Custom provider not implemented. Use 'openai' or 'transformers'." ) ;
69- }
70-
7174 if ( mergedConfig . provider === 'ollama' ) {
7275 const { OllamaEmbeddingProvider } = await import ( './ollama.js' ) ;
7376 const provider = new OllamaEmbeddingProvider (
@@ -80,10 +83,16 @@ export async function getEmbeddingProvider(
8083 return provider ;
8184 }
8285
86+ // Default: transformers (lazy loaded)
87+ const { TransformersEmbeddingProvider } = await import ( './transformers.js' ) ;
8388 const provider = new TransformersEmbeddingProvider ( mergedConfig . model ) ;
8489 await provider . initialize ( ) ;
8590 cachedProvider = provider ;
8691 cachedProviderType = providerKey ;
8792
8893 return provider ;
8994}
95+
96+ // Re-export TransformersEmbeddingProvider and MODEL_CONFIGS for consumers who need them
97+ // These will trigger transformers loading, but only when explicitly imported
98+ export { TransformersEmbeddingProvider , MODEL_CONFIGS } from './transformers.js' ;
0 commit comments