diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ee7221de..21e4f7361 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ Docs: https://docs.openclaw.ai ### Fixes +- OpenRouter/Anthropic: inject `cache_control` on system prompts for OpenRouter Anthropic models to improve prompt-cache reuse. (#17473) Thanks @rrenamed. + - Providers/OpenRouter: allow pass-through OpenRouter and Opencode model IDs in live model filtering so custom routed model IDs are treated as modern refs. (#14312) Thanks @Joly0. - Providers/OpenRouter: default reasoning to enabled when the selected model advertises `reasoning: true` and no session/directive override is set. (#22513) Thanks @zwffff. - Providers/OpenRouter: map `/think` levels to `reasoning.effort` in embedded runs while preserving explicit `reasoning.max_tokens` payloads. (#17236) Thanks @robbyczgw-cla. diff --git a/src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts b/src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts new file mode 100644 index 000000000..1468df855 --- /dev/null +++ b/src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts @@ -0,0 +1,99 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import type { Context, Model } from "@mariozechner/pi-ai"; +import { AssistantMessageEventStream } from "@mariozechner/pi-ai"; +import { describe, expect, it } from "vitest"; +import { applyExtraParamsToAgent } from "./extra-params.js"; + +describe("extra-params: OpenRouter Anthropic cache_control", () => { + it("injects cache_control into system message for OpenRouter Anthropic models", () => { + const payload = { + messages: [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "Hello" }, + ], + }; + const baseStreamFn: StreamFn = (_model, _context, options) => { + options?.onPayload?.(payload); + return new AssistantMessageEventStream(); + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent(agent, undefined, "openrouter", "anthropic/claude-opus-4-6"); + + const model = { + api: "openai-completions", + provider: "openrouter", + id: "anthropic/claude-opus-4-6", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + void agent.streamFn?.(model, context, {}); + + expect(payload.messages[0].content).toEqual([ + { type: "text", text: "You are a helpful assistant.", cache_control: { type: "ephemeral" } }, + ]); + expect(payload.messages[1].content).toBe("Hello"); + }); + + it("adds cache_control to last content block when system message is already array", () => { + const payload = { + messages: [ + { + role: "system", + content: [ + { type: "text", text: "Part 1" }, + { type: "text", text: "Part 2" }, + ], + }, + ], + }; + const baseStreamFn: StreamFn = (_model, _context, options) => { + options?.onPayload?.(payload); + return new AssistantMessageEventStream(); + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent(agent, undefined, "openrouter", "anthropic/claude-opus-4-6"); + + const model = { + api: "openai-completions", + provider: "openrouter", + id: "anthropic/claude-opus-4-6", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + void agent.streamFn?.(model, context, {}); + + const content = payload.messages[0].content as Array>; + expect(content[0]).toEqual({ type: "text", text: "Part 1" }); + expect(content[1]).toEqual({ + type: "text", + text: "Part 2", + cache_control: { type: "ephemeral" }, + }); + }); + + it("does not inject cache_control for OpenRouter non-Anthropic models", () => { + const payload = { + messages: [{ role: "system", content: "You are a helpful assistant." }], + }; + const baseStreamFn: StreamFn = (_model, _context, options) => { + options?.onPayload?.(payload); + return new AssistantMessageEventStream(); + }; + const agent = { streamFn: baseStreamFn }; + + applyExtraParamsToAgent(agent, undefined, "openrouter", "google/gemini-3-pro"); + + const model = { + api: "openai-completions", + provider: "openrouter", + id: "google/gemini-3-pro", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + void agent.streamFn?.(model, context, {}); + + expect(payload.messages[0].content).toBe("You are a helpful assistant."); + }); +}); diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index df0812c67..3ae690c94 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -290,6 +290,59 @@ function createAnthropicBetaHeadersWrapper( }; } +function isOpenRouterAnthropicModel(provider: string, modelId: string): boolean { + return provider.toLowerCase() === "openrouter" && modelId.toLowerCase().startsWith("anthropic/"); +} + +type PayloadMessage = { + role?: string; + content?: unknown; +}; + +/** + * Inject cache_control into the system message for OpenRouter Anthropic models. + * OpenRouter passes through Anthropic's cache_control field — caching the system + * prompt avoids re-processing it on every request. + */ +function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + if ( + typeof model.provider !== "string" || + typeof model.id !== "string" || + !isOpenRouterAnthropicModel(model.provider, model.id) + ) { + return underlying(model, context, options); + } + + const originalOnPayload = options?.onPayload; + return underlying(model, context, { + ...options, + onPayload: (payload) => { + const messages = (payload as Record)?.messages; + if (Array.isArray(messages)) { + for (const msg of messages as PayloadMessage[]) { + if (msg.role !== "system" && msg.role !== "developer") { + continue; + } + if (typeof msg.content === "string") { + msg.content = [ + { type: "text", text: msg.content, cache_control: { type: "ephemeral" } }, + ]; + } else if (Array.isArray(msg.content) && msg.content.length > 0) { + const last = msg.content[msg.content.length - 1]; + if (last && typeof last === "object") { + (last as Record).cache_control = { type: "ephemeral" }; + } + } + } + } + originalOnPayload?.(payload); + }, + }); + }; +} + /** * Map OpenClaw's ThinkLevel to OpenRouter's reasoning.effort values. * "off" maps to "none"; all other levels pass through as-is. @@ -426,6 +479,7 @@ export function applyExtraParamsToAgent( if (provider === "openrouter") { log.debug(`applying OpenRouter app attribution headers for ${provider}/${modelId}`); agent.streamFn = createOpenRouterWrapper(agent.streamFn, thinkingLevel); + agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn); } // Enable Z.AI tool_stream for real-time tool call streaming.