From 10e6e274515a761b080e24ae243836de52172e3c Mon Sep 17 00:00:00 2001
From: Andyliu <andyliu@users.noreply.github.com>
Date: Wed, 11 Mar 2026 20:43:59 +0800
Subject: [PATCH] fix(models): guard optional model input capabilities 
 (#42096)

Merged via squash.

Prepared head SHA: d398fa0222b7045b549fd3592d469c079ca3efb6
Co-authored-by: andyliu <2377291+andyliu@users.noreply.github.com>
Co-authored-by: hydro13 <6640526+hydro13@users.noreply.github.com>
Reviewed-by: @hydro13
---
 CHANGELOG.md                                |  1 +
 src/agents/model-scan.ts                    |  6 ++--
 src/agents/pi-embedded-runner/model.test.ts | 36 +++++++++++++++++++++
 src/agents/pi-embedded-runner/model.ts      |  8 ++++-
 4 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 39928d6de..1211b3ace 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -96,6 +96,7 @@ Docs: https://docs.openclaw.ai
 - Telegram/final preview delivery followup: keep ambiguous missing-`message_id` finals only when a preview was already visible, while first-preview/no-id cases still fall back so Telegram users do not lose the final reply. (#41932) thanks @hougangdev.
 - Agents/Azure OpenAI Responses: include the `azure-openai` provider in the Responses API store override so Azure OpenAI multi-turn cron jobs and embedded agent runs no longer fail with HTTP 400 "store is set to false". (#42934, fixes #42800) Thanks @ademczuk.
 - Agents/context pruning: prune image-only tool results during soft-trim, align context-pruning coverage with the new tool-result contract, and extend historical image cleanup to the same screenshot-heavy session path. (#43045) Thanks @MoerAI.
+- fix(models): guard optional model.input capability checks (#42096) thanks @andyliu
 
 ## 2026.3.8
 
diff --git a/src/agents/model-scan.ts b/src/agents/model-scan.ts
index a0f05e054..dec46b4db 100644
--- a/src/agents/model-scan.ts
+++ b/src/agents/model-scan.ts
@@ -326,12 +326,12 @@ async function probeImage(
 }
 
 function ensureImageInput(model: OpenAIModel): OpenAIModel {
-  if (model.input.includes("image")) {
+  if (model.input?.includes("image")) {
     return model;
   }
   return {
     ...model,
-    input: Array.from(new Set([...model.input, "image"])),
+    input: Array.from(new Set([...(model.input ?? []), "image"])),
   };
 }
 
@@ -472,7 +472,7 @@ export async function scanOpenRouterModels(
       };
 
       const toolResult = await probeTool(model, apiKey, timeoutMs);
-      const imageResult = model.input.includes("image")
+      const imageResult = model.input?.includes("image")
         ? await probeImage(ensureImageInput(model), apiKey, timeoutMs)
         : { ok: false, latencyMs: null, skipped: true };
 
diff --git a/src/agents/pi-embedded-runner/model.test.ts b/src/agents/pi-embedded-runner/model.test.ts
index 105f929b9..5789dfaad 100644
--- a/src/agents/pi-embedded-runner/model.test.ts
+++ b/src/agents/pi-embedded-runner/model.test.ts
@@ -202,6 +202,42 @@ describe("buildInlineProviderModels", () => {
 });
 
 describe("resolveModel", () => {
+  it("defaults model input to text when discovery omits input", () => {
+    mockDiscoveredModel({
+      provider: "custom",
+      modelId: "missing-input",
+      templateModel: {
+        id: "missing-input",
+        name: "missing-input",
+        api: "openai-completions",
+        provider: "custom",
+        baseUrl: "http://localhost:9999",
+        reasoning: false,
+        // NOTE: deliberately omit input to simulate buggy/custom catalogs.
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 8192,
+        maxTokens: 1024,
+      },
+    });
+
+    const result = resolveModel("custom", "missing-input", "/tmp/agent", {
+      models: {
+        providers: {
+          custom: {
+            baseUrl: "http://localhost:9999",
+            api: "openai-completions",
+            // Intentionally keep this minimal — the discovered model provides the rest.
+            models: [{ id: "missing-input", name: "missing-input" }],
+          },
+        },
+      },
+    } as unknown as OpenClawConfig);
+
+    expect(result.error).toBeUndefined();
+    expect(Array.isArray(result.model?.input)).toBe(true);
+    expect(result.model?.input).toEqual(["text"]);
+  });
+
   it("includes provider baseUrl in fallback model", () => {
     const cfg = {
       models: {
diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts
index 6f2852203..eb9fa675b 100644
--- a/src/agents/pi-embedded-runner/model.ts
+++ b/src/agents/pi-embedded-runner/model.ts
@@ -93,12 +93,18 @@ function applyConfiguredProviderOverrides(params: {
       headers: discoveredHeaders,
     };
   }
+  const resolvedInput = configuredModel?.input ?? discoveredModel.input;
+  const normalizedInput =
+    Array.isArray(resolvedInput) && resolvedInput.length > 0
+      ? resolvedInput.filter((item) => item === "text" || item === "image")
+      : (["text"] as Array<"text" | "image">);
+
   return {
     ...discoveredModel,
     api: configuredModel?.api ?? providerConfig.api ?? discoveredModel.api,
     baseUrl: providerConfig.baseUrl ?? discoveredModel.baseUrl,
     reasoning: configuredModel?.reasoning ?? discoveredModel.reasoning,
-    input: configuredModel?.input ?? discoveredModel.input,
+    input: normalizedInput,
     cost: configuredModel?.cost ?? discoveredModel.cost,
     contextWindow: configuredModel?.contextWindow ?? discoveredModel.contextWindow,
     maxTokens: configuredModel?.maxTokens ?? discoveredModel.maxTokens,