From 658bd54ecf83cea90e905d4f86189af08eaf43f7 Mon Sep 17 00:00:00 2001
From: Xaden Ryan <165437834+xadenryan@users.noreply.github.com>
Date: Thu, 12 Mar 2026 02:21:35 -0600
Subject: [PATCH] feat(llm-task): add thinking override

Co-authored-by: Xaden Ryan <165437834+xadenryan@users.noreply.github.com>
---
 CHANGELOG.md                                  |  1 +
 docs/tools/llm-task.md                        |  2 +
 docs/tools/lobster.md                         |  1 +
 extensions/llm-task/README.md                 |  1 +
 extensions/llm-task/src/llm-task-tool.test.ts | 53 +++++++++++++++++++
 extensions/llm-task/src/llm-task-tool.ts      | 22 +++++++-
 src/plugin-sdk/llm-task.ts                    |  6 +++
 7 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 92c8fe702..8551a0ccd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
 - Gateway/node pending work: add narrow in-memory pending-work queue primitives (`node.pending.enqueue` / `node.pending.drain`) and wake-helper reuse as a foundation for dormant-node work delivery. (#41409) Thanks @mbelinky.
 - Git/runtime state: ignore the gateway-generated `.dev-state` file so local runtime state does not show up as untracked repo noise. (#41848) Thanks @smysle.
 - Exec/child commands: mark child command environments with `OPENCLAW_CLI` so subprocesses can detect when they were launched from the OpenClaw CLI. (#41411) Thanks @vincentkoc.
+- LLM Task/Lobster: add an optional `thinking` override so workflow calls can explicitly set embedded reasoning level with shared validation for invalid values and unsupported `xhigh` modes. (#15606) Thanks @xadenryan and @ImLukeF.
 
 ### Breaking
 
diff --git a/docs/tools/llm-task.md b/docs/tools/llm-task.md
index e6f574d07..16de8230f 100644
--- a/docs/tools/llm-task.md
+++ b/docs/tools/llm-task.md
@@ -75,6 +75,7 @@ outside the list is rejected.
 - `schema` (object, optional JSON Schema)
 - `provider` (string, optional)
 - `model` (string, optional)
+- `thinking` (string, optional)
 - `authProfileId` (string, optional)
 - `temperature` (number, optional)
 - `maxTokens` (number, optional)
@@ -90,6 +91,7 @@ Returns `details.json` containing the parsed JSON (and validates against
 ```lobster
 openclaw.invoke --tool llm-task --action json --args-json '{
   "prompt": "Given the input email, return intent and draft.",
+  "thinking": "low",
   "input": {
     "subject": "Hello",
     "body": "Can you help?"
diff --git a/docs/tools/lobster.md b/docs/tools/lobster.md
index 65ff4f56d..5c8a47e4d 100644
--- a/docs/tools/lobster.md
+++ b/docs/tools/lobster.md
@@ -106,6 +106,7 @@ Use it in a pipeline:
 ```lobster
 openclaw.invoke --tool llm-task --action json --args-json '{
   "prompt": "Given the input email, return intent and draft.",
+  "thinking": "low",
   "input": { "subject": "Hello", "body": "Can you help?" },
   "schema": {
     "type": "object",
diff --git a/extensions/llm-task/README.md b/extensions/llm-task/README.md
index d8e5dadc6..738208f3d 100644
--- a/extensions/llm-task/README.md
+++ b/extensions/llm-task/README.md
@@ -69,6 +69,7 @@ outside the list is rejected.
 - `schema` (object, optional JSON Schema)
 - `provider` (string, optional)
 - `model` (string, optional)
+- `thinking` (string, optional)
 - `authProfileId` (string, optional)
 - `temperature` (number, optional)
 - `maxTokens` (number, optional)
diff --git a/extensions/llm-task/src/llm-task-tool.test.ts b/extensions/llm-task/src/llm-task-tool.test.ts
index fea135e8b..fc9f0e072 100644
--- a/extensions/llm-task/src/llm-task-tool.test.ts
+++ b/extensions/llm-task/src/llm-task-tool.test.ts
@@ -109,6 +109,59 @@ describe("llm-task tool (json-only)", () => {
     expect(call.model).toBe("claude-4-sonnet");
   });
 
+  it("passes thinking override to embedded runner", async () => {
+    // oxlint-disable-next-line typescript/no-explicit-any
+    (runEmbeddedPiAgent as any).mockResolvedValueOnce({
+      meta: {},
+      payloads: [{ text: JSON.stringify({ ok: true }) }],
+    });
+    const tool = createLlmTaskTool(fakeApi());
+    await tool.execute("id", { prompt: "x", thinking: "high" });
+    // oxlint-disable-next-line typescript/no-explicit-any
+    const call = (runEmbeddedPiAgent as any).mock.calls[0]?.[0];
+    expect(call.thinkLevel).toBe("high");
+  });
+
+  it("normalizes thinking aliases", async () => {
+    // oxlint-disable-next-line typescript/no-explicit-any
+    (runEmbeddedPiAgent as any).mockResolvedValueOnce({
+      meta: {},
+      payloads: [{ text: JSON.stringify({ ok: true }) }],
+    });
+    const tool = createLlmTaskTool(fakeApi());
+    await tool.execute("id", { prompt: "x", thinking: "on" });
+    // oxlint-disable-next-line typescript/no-explicit-any
+    const call = (runEmbeddedPiAgent as any).mock.calls[0]?.[0];
+    expect(call.thinkLevel).toBe("low");
+  });
+
+  it("throws on invalid thinking level", async () => {
+    const tool = createLlmTaskTool(fakeApi());
+    await expect(tool.execute("id", { prompt: "x", thinking: "banana" })).rejects.toThrow(
+      /invalid thinking level/i,
+    );
+  });
+
+  it("throws on unsupported xhigh thinking level", async () => {
+    const tool = createLlmTaskTool(fakeApi());
+    await expect(tool.execute("id", { prompt: "x", thinking: "xhigh" })).rejects.toThrow(
+      /only supported/i,
+    );
+  });
+
+  it("does not pass thinkLevel when thinking is omitted", async () => {
+    // oxlint-disable-next-line typescript/no-explicit-any
+    (runEmbeddedPiAgent as any).mockResolvedValueOnce({
+      meta: {},
+      payloads: [{ text: JSON.stringify({ ok: true }) }],
+    });
+    const tool = createLlmTaskTool(fakeApi());
+    await tool.execute("id", { prompt: "x" });
+    // oxlint-disable-next-line typescript/no-explicit-any
+    const call = (runEmbeddedPiAgent as any).mock.calls[0]?.[0];
+    expect(call.thinkLevel).toBeUndefined();
+  });
+
   it("enforces allowedModels", async () => {
     // oxlint-disable-next-line typescript/no-explicit-any
     (runEmbeddedPiAgent as any).mockResolvedValueOnce({
diff --git a/extensions/llm-task/src/llm-task-tool.ts b/extensions/llm-task/src/llm-task-tool.ts
index 3a2e42c72..ff2037e53 100644
--- a/extensions/llm-task/src/llm-task-tool.ts
+++ b/extensions/llm-task/src/llm-task-tool.ts
@@ -2,7 +2,13 @@ import fs from "node:fs/promises";
 import path from "node:path";
 import { Type } from "@sinclair/typebox";
 import Ajv from "ajv";
-import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/llm-task";
+import {
+  formatThinkingLevels,
+  formatXHighModelHint,
+  normalizeThinkLevel,
+  resolvePreferredOpenClawTmpDir,
+  supportsXHighThinking,
+} from "openclaw/plugin-sdk/llm-task";
 // NOTE: This extension is intended to be bundled with OpenClaw.
 // When running from source (tests/dev), OpenClaw internals live under src/.
 // When running from a built install, internals live under dist/ (no src/ tree).
@@ -86,6 +92,7 @@ export function createLlmTaskTool(api: OpenClawPluginApi) {
         Type.String({ description: "Provider override (e.g. openai-codex, anthropic)." }),
       ),
       model: Type.Optional(Type.String({ description: "Model id override." })),
+      thinking: Type.Optional(Type.String({ description: "Thinking level override." })),
       authProfileId: Type.Optional(Type.String({ description: "Auth profile override." })),
       temperature: Type.Optional(Type.Number({ description: "Best-effort temperature override." })),
       maxTokens: Type.Optional(Type.Number({ description: "Best-effort maxTokens override." })),
@@ -144,6 +151,18 @@ export function createLlmTaskTool(api: OpenClawPluginApi) {
         );
       }
 
+      const thinkingRaw =
+        typeof params.thinking === "string" && params.thinking.trim() ? params.thinking : undefined;
+      const thinkLevel = thinkingRaw ? normalizeThinkLevel(thinkingRaw) : undefined;
+      if (thinkingRaw && !thinkLevel) {
+        throw new Error(
+          `Invalid thinking level "${thinkingRaw}". Use one of: ${formatThinkingLevels(provider, model)}.`,
+        );
+      }
+      if (thinkLevel === "xhigh" && !supportsXHighThinking(provider, model)) {
+        throw new Error(`Thinking level "xhigh" is only supported for ${formatXHighModelHint()}.`);
+      }
+
       const timeoutMs =
         (typeof params.timeoutMs === "number" && params.timeoutMs > 0
           ? params.timeoutMs
@@ -204,6 +223,7 @@ export function createLlmTaskTool(api: OpenClawPluginApi) {
           model,
           authProfileId,
           authProfileIdSource: authProfileId ? "user" : "auto",
+          thinkLevel,
           streamParams,
           disableTools: true,
         });
diff --git a/src/plugin-sdk/llm-task.ts b/src/plugin-sdk/llm-task.ts
index 164a28f04..c69e82f36 100644
--- a/src/plugin-sdk/llm-task.ts
+++ b/src/plugin-sdk/llm-task.ts
@@ -2,4 +2,10 @@
 // Keep this list additive and scoped to symbols used under extensions/llm-task.
 
 export { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
+export {
+  formatThinkingLevels,
+  formatXHighModelHint,
+  normalizeThinkLevel,
+  supportsXHighThinking,
+} from "../auto-reply/thinking.js";
 export type { AnyAgentTool, OpenClawPluginApi } from "../plugins/types.js";