From 658bd54ecf83cea90e905d4f86189af08eaf43f7 Mon Sep 17 00:00:00 2001 From: Xaden Ryan <165437834+xadenryan@users.noreply.github.com> Date: Thu, 12 Mar 2026 02:21:35 -0600 Subject: [PATCH] feat(llm-task): add thinking override Co-authored-by: Xaden Ryan <165437834+xadenryan@users.noreply.github.com> --- CHANGELOG.md | 1 + docs/tools/llm-task.md | 2 + docs/tools/lobster.md | 1 + extensions/llm-task/README.md | 1 + extensions/llm-task/src/llm-task-tool.test.ts | 53 +++++++++++++++++++ extensions/llm-task/src/llm-task-tool.ts | 22 +++++++- src/plugin-sdk/llm-task.ts | 6 +++ 7 files changed, 85 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92c8fe702..8551a0ccd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai - Gateway/node pending work: add narrow in-memory pending-work queue primitives (`node.pending.enqueue` / `node.pending.drain`) and wake-helper reuse as a foundation for dormant-node work delivery. (#41409) Thanks @mbelinky. - Git/runtime state: ignore the gateway-generated `.dev-state` file so local runtime state does not show up as untracked repo noise. (#41848) Thanks @smysle. - Exec/child commands: mark child command environments with `OPENCLAW_CLI` so subprocesses can detect when they were launched from the OpenClaw CLI. (#41411) Thanks @vincentkoc. +- LLM Task/Lobster: add an optional `thinking` override so workflow calls can explicitly set embedded reasoning level with shared validation for invalid values and unsupported `xhigh` modes. (#15606) Thanks @xadenryan and @ImLukeF. ### Breaking diff --git a/docs/tools/llm-task.md b/docs/tools/llm-task.md index e6f574d07..16de8230f 100644 --- a/docs/tools/llm-task.md +++ b/docs/tools/llm-task.md @@ -75,6 +75,7 @@ outside the list is rejected. - `schema` (object, optional JSON Schema) - `provider` (string, optional) - `model` (string, optional) +- `thinking` (string, optional) - `authProfileId` (string, optional) - `temperature` (number, optional) - `maxTokens` (number, optional) @@ -90,6 +91,7 @@ Returns `details.json` containing the parsed JSON (and validates against ```lobster openclaw.invoke --tool llm-task --action json --args-json '{ "prompt": "Given the input email, return intent and draft.", + "thinking": "low", "input": { "subject": "Hello", "body": "Can you help?" diff --git a/docs/tools/lobster.md b/docs/tools/lobster.md index 65ff4f56d..5c8a47e4d 100644 --- a/docs/tools/lobster.md +++ b/docs/tools/lobster.md @@ -106,6 +106,7 @@ Use it in a pipeline: ```lobster openclaw.invoke --tool llm-task --action json --args-json '{ "prompt": "Given the input email, return intent and draft.", + "thinking": "low", "input": { "subject": "Hello", "body": "Can you help?" }, "schema": { "type": "object", diff --git a/extensions/llm-task/README.md b/extensions/llm-task/README.md index d8e5dadc6..738208f3d 100644 --- a/extensions/llm-task/README.md +++ b/extensions/llm-task/README.md @@ -69,6 +69,7 @@ outside the list is rejected. - `schema` (object, optional JSON Schema) - `provider` (string, optional) - `model` (string, optional) +- `thinking` (string, optional) - `authProfileId` (string, optional) - `temperature` (number, optional) - `maxTokens` (number, optional) diff --git a/extensions/llm-task/src/llm-task-tool.test.ts b/extensions/llm-task/src/llm-task-tool.test.ts index fea135e8b..fc9f0e072 100644 --- a/extensions/llm-task/src/llm-task-tool.test.ts +++ b/extensions/llm-task/src/llm-task-tool.test.ts @@ -109,6 +109,59 @@ describe("llm-task tool (json-only)", () => { expect(call.model).toBe("claude-4-sonnet"); }); + it("passes thinking override to embedded runner", async () => { + // oxlint-disable-next-line typescript/no-explicit-any + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: JSON.stringify({ ok: true }) }], + }); + const tool = createLlmTaskTool(fakeApi()); + await tool.execute("id", { prompt: "x", thinking: "high" }); + // oxlint-disable-next-line typescript/no-explicit-any + const call = (runEmbeddedPiAgent as any).mock.calls[0]?.[0]; + expect(call.thinkLevel).toBe("high"); + }); + + it("normalizes thinking aliases", async () => { + // oxlint-disable-next-line typescript/no-explicit-any + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: JSON.stringify({ ok: true }) }], + }); + const tool = createLlmTaskTool(fakeApi()); + await tool.execute("id", { prompt: "x", thinking: "on" }); + // oxlint-disable-next-line typescript/no-explicit-any + const call = (runEmbeddedPiAgent as any).mock.calls[0]?.[0]; + expect(call.thinkLevel).toBe("low"); + }); + + it("throws on invalid thinking level", async () => { + const tool = createLlmTaskTool(fakeApi()); + await expect(tool.execute("id", { prompt: "x", thinking: "banana" })).rejects.toThrow( + /invalid thinking level/i, + ); + }); + + it("throws on unsupported xhigh thinking level", async () => { + const tool = createLlmTaskTool(fakeApi()); + await expect(tool.execute("id", { prompt: "x", thinking: "xhigh" })).rejects.toThrow( + /only supported/i, + ); + }); + + it("does not pass thinkLevel when thinking is omitted", async () => { + // oxlint-disable-next-line typescript/no-explicit-any + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: JSON.stringify({ ok: true }) }], + }); + const tool = createLlmTaskTool(fakeApi()); + await tool.execute("id", { prompt: "x" }); + // oxlint-disable-next-line typescript/no-explicit-any + const call = (runEmbeddedPiAgent as any).mock.calls[0]?.[0]; + expect(call.thinkLevel).toBeUndefined(); + }); + it("enforces allowedModels", async () => { // oxlint-disable-next-line typescript/no-explicit-any (runEmbeddedPiAgent as any).mockResolvedValueOnce({ diff --git a/extensions/llm-task/src/llm-task-tool.ts b/extensions/llm-task/src/llm-task-tool.ts index 3a2e42c72..ff2037e53 100644 --- a/extensions/llm-task/src/llm-task-tool.ts +++ b/extensions/llm-task/src/llm-task-tool.ts @@ -2,7 +2,13 @@ import fs from "node:fs/promises"; import path from "node:path"; import { Type } from "@sinclair/typebox"; import Ajv from "ajv"; -import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/llm-task"; +import { + formatThinkingLevels, + formatXHighModelHint, + normalizeThinkLevel, + resolvePreferredOpenClawTmpDir, + supportsXHighThinking, +} from "openclaw/plugin-sdk/llm-task"; // NOTE: This extension is intended to be bundled with OpenClaw. // When running from source (tests/dev), OpenClaw internals live under src/. // When running from a built install, internals live under dist/ (no src/ tree). @@ -86,6 +92,7 @@ export function createLlmTaskTool(api: OpenClawPluginApi) { Type.String({ description: "Provider override (e.g. openai-codex, anthropic)." }), ), model: Type.Optional(Type.String({ description: "Model id override." })), + thinking: Type.Optional(Type.String({ description: "Thinking level override." })), authProfileId: Type.Optional(Type.String({ description: "Auth profile override." })), temperature: Type.Optional(Type.Number({ description: "Best-effort temperature override." })), maxTokens: Type.Optional(Type.Number({ description: "Best-effort maxTokens override." })), @@ -144,6 +151,18 @@ export function createLlmTaskTool(api: OpenClawPluginApi) { ); } + const thinkingRaw = + typeof params.thinking === "string" && params.thinking.trim() ? params.thinking : undefined; + const thinkLevel = thinkingRaw ? normalizeThinkLevel(thinkingRaw) : undefined; + if (thinkingRaw && !thinkLevel) { + throw new Error( + `Invalid thinking level "${thinkingRaw}". Use one of: ${formatThinkingLevels(provider, model)}.`, + ); + } + if (thinkLevel === "xhigh" && !supportsXHighThinking(provider, model)) { + throw new Error(`Thinking level "xhigh" is only supported for ${formatXHighModelHint()}.`); + } + const timeoutMs = (typeof params.timeoutMs === "number" && params.timeoutMs > 0 ? params.timeoutMs @@ -204,6 +223,7 @@ export function createLlmTaskTool(api: OpenClawPluginApi) { model, authProfileId, authProfileIdSource: authProfileId ? "user" : "auto", + thinkLevel, streamParams, disableTools: true, }); diff --git a/src/plugin-sdk/llm-task.ts b/src/plugin-sdk/llm-task.ts index 164a28f04..c69e82f36 100644 --- a/src/plugin-sdk/llm-task.ts +++ b/src/plugin-sdk/llm-task.ts @@ -2,4 +2,10 @@ // Keep this list additive and scoped to symbols used under extensions/llm-task. export { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; +export { + formatThinkingLevels, + formatXHighModelHint, + normalizeThinkLevel, + supportsXHighThinking, +} from "../auto-reply/thinking.js"; export type { AnyAgentTool, OpenClawPluginApi } from "../plugins/types.js";