constructive-io · pyramation · May 21, 2026 · May 21, 2026
diff --git a/graphile/graphile-llm/src/metering.ts b/graphile/graphile-llm/src/metering.ts
@@ -133,17 +133,21 @@ export interface InferenceLogEntry {
   actorId: string | null;
   model: string;
   provider: string | null;
-  requestType: 'embedding' | 'chat' | 'rag';
+  service: 'llm' | 'embedding' | 'tts' | 'stt' | 'ocr' | 'image_gen' | 'search' | 'compute';
+  operation: string;
   inputTokens: number;
   outputTokens: number;
   totalTokens: number;
+  cacheReadTokens: number | null;
+  cacheWriteTokens: number | null;
   latencyMs: number;
   ragEnabled: boolean;
   chunksRetrieved: number | null;
   embeddingModel: string | null;
   embeddingLatencyMs: number | null;
   status: 'success' | 'quota_exceeded' | 'provider_error' | 'timeout';
   errorType: string | null;
+  rawUsage: Record<string, unknown> | null;
 }
 
 /**
@@ -161,29 +165,33 @@ export async function logInferenceUsage(
   const { schema, tableName } = ctx.inferenceLog;
   const sql = `INSERT INTO "${schema}"."${tableName}" (
     database_id, entity_id, actor_id,
-    model, provider, request_type,
+    model, provider, service, operation,
     input_tokens, output_tokens, total_tokens,
+    cache_read_tokens, cache_write_tokens,
     latency_ms, rag_enabled, chunks_retrieved,
     embedding_model, embedding_latency_ms,
-    status, error_type
+    status, error_type, raw_usage
   ) VALUES (
     $1, $2, $3,
-    $4, $5, $6,
-    $7, $8, $9,
-    $10, $11, $12,
-    $13, $14,
-    $15, $16
+    $4, $5, $6, $7,
+    $8, $9, $10,
+    $11, $12,
+    $13, $14, $15,
+    $16, $17,
+    $18, $19, $20
   )`;
 
   try {
     await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
       await pgClient.query(sql, [
         entry.databaseId, entry.entityId, entry.actorId,
-        entry.model, entry.provider, entry.requestType,
+        entry.model, entry.provider, entry.service, entry.operation,
         entry.inputTokens, entry.outputTokens, entry.totalTokens,
+        entry.cacheReadTokens, entry.cacheWriteTokens,
         entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
         entry.embeddingModel, entry.embeddingLatencyMs,
         entry.status, entry.errorType,
+        entry.rawUsage ? JSON.stringify(entry.rawUsage) : null,
       ]);
     });
   } catch (e: unknown) {
@@ -259,17 +267,21 @@ export async function meteredEmbed(
       actorId: ctx.actorId,
       model: options.embeddingModel ?? meterSlug,
       provider: options.provider ?? null,
-      requestType: 'embedding',
+      service: 'embedding',
+      operation: 'create',
       inputTokens: placeholderAmountTokens,
       outputTokens: 0,
       totalTokens: placeholderAmountTokens,
+      cacheReadTokens: null,
+      cacheWriteTokens: null,
       latencyMs: Date.now() - startTime,
       ragEnabled: false,
       chunksRetrieved: null,
       embeddingModel: options.embeddingModel ?? null,
       embeddingLatencyMs: null,
       status: 'quota_exceeded',
       errorType: null,
+      rawUsage: null,
     }).catch(() => {});
 
     return {
@@ -302,17 +314,21 @@ export async function meteredEmbed(
     actorId: ctx.actorId,
     model: options.embeddingModel ?? meterSlug,
     provider: options.provider ?? null,
-    requestType: 'embedding',
+    service: 'embedding',
+    operation: 'create',
     inputTokens: placeholderAmountTokens,
     outputTokens: 0,
     totalTokens: placeholderAmountTokens,
+    cacheReadTokens: null,
+    cacheWriteTokens: null,
     latencyMs,
     ragEnabled: false,
     chunksRetrieved: null,
     embeddingModel: options.embeddingModel ?? null,
     embeddingLatencyMs: latencyMs,
     status: 'success',
     errorType: null,
+    rawUsage: null,
   }).catch(() => {});
 
   return {
@@ -387,17 +403,21 @@ export async function meteredChat(
       actorId: ctx.actorId,
       model: meteringOptions.chatModel ?? meterSlug,
       provider: meteringOptions.provider ?? null,
-      requestType: 'chat',
+      service: 'llm',
+      operation: 'chat',
       inputTokens: placeholderInputTokens,
       outputTokens: 0,
       totalTokens: placeholderInputTokens,
+      cacheReadTokens: null,
+      cacheWriteTokens: null,
       latencyMs: Date.now() - startTime,
       ragEnabled: false,
       chunksRetrieved: null,
       embeddingModel: null,
       embeddingLatencyMs: null,
       status: 'quota_exceeded',
       errorType: null,
+      rawUsage: null,
     }).catch(() => {});
 
     return {
@@ -434,17 +454,21 @@ export async function meteredChat(
     actorId: ctx.actorId,
     model: meteringOptions.chatModel ?? meterSlug,
     provider: meteringOptions.provider ?? null,
-    requestType: 'chat',
+    service: 'llm',
+    operation: 'chat',
     inputTokens: placeholderInputTokens,
     outputTokens: placeholderOutputTokens,
     totalTokens: placeholderTotalTokens,
+    cacheReadTokens: null,
+    cacheWriteTokens: null,
     latencyMs,
     ragEnabled: false,
     chunksRetrieved: null,
     embeddingModel: null,
     embeddingLatencyMs: null,
     status: 'success',
     errorType: null,
+    rawUsage: null,
   }).catch(() => {});
 
   return {

diff --git a/graphql/server/src/middleware/llm-api.ts b/graphql/server/src/middleware/llm-api.ts
@@ -264,7 +264,8 @@ async function logInference(
     actorId: string;
     model: string;
     provider: string;
-    requestType: string;
+    service: string;
+    operation: string;
     inputTokens: number;
     outputTokens: number;
     totalTokens: number;
@@ -276,14 +277,15 @@ async function logInference(
     await withRlsClient(pool, pgSettings, async (client) => {
       await client.query(
         `INSERT INTO "${logInfo.schemaName}"."${logInfo.tableName}"
-         (entity_id, actor_id, model, provider, request_type, input_tokens, output_tokens, total_tokens, latency_ms, status)
-         VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`,
+         (entity_id, actor_id, model, provider, service, operation, input_tokens, output_tokens, total_tokens, latency_ms, status)
+         VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)`,
         [
           data.entityId,
           data.actorId,
           data.model,
           data.provider,
-          data.requestType,
+          data.service,
+          data.operation,
           data.inputTokens,
           data.outputTokens,
           data.totalTokens,
@@ -572,7 +574,8 @@ async function handleSendMessage(
           actorId: userId,
           model,
           provider: 'ollama',
-          requestType: 'chat',
+          service: 'llm',
+          operation: 'chat',
           inputTokens,
           outputTokens,
           totalTokens,
@@ -635,7 +638,8 @@ async function handleSendMessage(
         actorId: userId,
         model,
         provider: 'ollama',
-        requestType: 'chat',
+        service: 'llm',
+        operation: 'chat',
         inputTokens,
         outputTokens,
         totalTokens,