fix(usage): parse Kimi K2 cached_tokens from prompt_tokens_details

Kimi K2 models use automatic prefix caching and return cache stats in a nested field: usage.prompt_tokens_details.cached_tokens This fixes issue #7073 where cacheRead was showing 0 for K2.5 users. Also adds cached_tokens (top-level) for moonshot-v1 explicit caching API. Closes #7073
2026-02-24 21:21:14 +08:00
parent b511a38fc8
commit aa2826b5b1
2 changed files with 45 additions and 1 deletions
--- a/src/agents/usage.test.ts
+++ b/src/agents/usage.test.ts
@@ -54,6 +54,40 @@ describe("normalizeUsage", () => {
    });
  });

+  it("handles Moonshot/Kimi cached_tokens field", () => {
+    // Moonshot v1 returns cached_tokens instead of cache_read_input_tokens
+    const usage = normalizeUsage({
+      prompt_tokens: 30,
+      completion_tokens: 9,
+      total_tokens: 39,
+      cached_tokens: 19,
+    });
+    expect(usage).toEqual({
+      input: 30,
+      output: 9,
+      cacheRead: 19,
+      cacheWrite: undefined,
+      total: 39,
+    });
+  });
+
+  it("handles Kimi K2 prompt_tokens_details.cached_tokens field", () => {
+    // Kimi K2 uses automatic prefix caching and returns cached_tokens in prompt_tokens_details
+    const usage = normalizeUsage({
+      prompt_tokens: 1113,
+      completion_tokens: 5,
+      total_tokens: 1118,
+      prompt_tokens_details: { cached_tokens: 1024 },
+    });
+    expect(usage).toEqual({
+      input: 1113,
+      output: 5,
+      cacheRead: 1024,
+      cacheWrite: undefined,
+      total: 1118,
+    });
+  });
+
  it("returns undefined when no valid fields are provided", () => {
    const usage = normalizeUsage(null);
    expect(usage).toBeUndefined();
--- a/src/agents/usage.ts
+++ b/src/agents/usage.ts
@@ -15,6 +15,10 @@ export type UsageLike = {
  completion_tokens?: number;
  cache_read_input_tokens?: number;
  cache_creation_input_tokens?: number;
+  // Moonshot/Kimi uses cached_tokens for cache read count (explicit caching API).
+  cached_tokens?: number;
+  // Kimi K2 uses prompt_tokens_details.cached_tokens for automatic prefix caching.
+  prompt_tokens_details?: { cached_tokens?: number };
  // Some agents/logs emit alternate naming.
  totalTokens?: number;
  total_tokens?: number;
@@ -64,7 +68,13 @@ export function normalizeUsage(raw?: UsageLike | null): NormalizedUsage | undefi
      raw.completionTokens ??
      raw.completion_tokens,
  );
-  const cacheRead = asFiniteNumber(raw.cacheRead ?? raw.cache_read ?? raw.cache_read_input_tokens);
+  const cacheRead = asFiniteNumber(
+    raw.cacheRead ??
+      raw.cache_read ??
+      raw.cache_read_input_tokens ??
+      raw.cached_tokens ??
+      raw.prompt_tokens_details?.cached_tokens,
+  );
  const cacheWrite = asFiniteNumber(
    raw.cacheWrite ?? raw.cache_write ?? raw.cache_creation_input_tokens,
  );