fix(agents): validate tool-result MEDIA directives with shared parser

Co-authored-by: Ho Lim <166576253+HOYALIM@users.noreply.github.com>
2026-02-22 19:51:38 +01:00
parent 0c1f491a02
commit c3d11d56c3
4 changed files with 50 additions and 20 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -59,6 +59,7 @@ Docs: https://docs.openclaw.ai
 - Telegram/Polling: clear Telegram webhooks (`deleteWebhook`) before starting long-poll `getUpdates`, including retry handling for transient cleanup failures.
 - Telegram/Webhook: add `channels.telegram.webhookPort` config support and pass it through plugin startup wiring to the monitor listener.
 - Telegram/Media: send a user-facing Telegram reply when media download fails (non-size errors) instead of silently dropping the message.
+- Agents/Media: route tool-result `MEDIA:` extraction through shared parser validation so malformed prose like `MEDIA:-prefixed ...` is no longer treated as a local file path (prevents Telegram ENOENT tool-error overrides). (#18780) Thanks @HOYALIM.
 - Logging: cap single log-file size with `logging.maxFileBytes` (default 500 MB) and suppress additional writes after cap hit to prevent disk exhaustion from repeated error storms.
 - Memory/Remote HTTP: centralize remote memory HTTP calls behind a shared guarded helper (`withRemoteHttpResponse`) so embeddings and batch flows use one request/release path.
 - Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook.
--- a/src/agents/pi-embedded-subscribe.tools.media.test.ts
+++ b/src/agents/pi-embedded-subscribe.tools.media.test.ts
@@ -175,6 +175,18 @@ describe("extractToolResultMediaPaths", () => {
    expect(extractToolResultMediaPaths(result)).toEqual([]);
  });

+  it("does not treat malformed MEDIA:-prefixed prose as a file path", () => {
+    const result = {
+      content: [
+        {
+          type: "text",
+          text: "MEDIA:-prefixed paths (lenient whitespace) when loading outbound media",
+        },
+      ],
+    };
+    expect(extractToolResultMediaPaths(result)).toEqual([]);
+  });
+
  it("still extracts MEDIA: at line start after other text lines", () => {
    const result = {
      content: [
--- a/src/agents/pi-embedded-subscribe.tools.ts
+++ b/src/agents/pi-embedded-subscribe.tools.ts
@@ -1,6 +1,6 @@
 import { getChannelPlugin, normalizeChannelId } from "../channels/plugins/index.js";
 import { normalizeTargetForProvider } from "../infra/outbound/target-normalization.js";
-import { MEDIA_TOKEN_RE } from "../media/parse.js";
+import { splitMediaFromOutput } from "../media/parse.js";
 import { truncateUtf16Safe } from "../utils.js";
 import { collectTextContentBlocks } from "./content-blocks.js";
 import { type MessagingToolSend } from "./pi-embedded-messaging.js";
@@ -203,7 +203,8 @@ export function extractToolResultMediaPaths(result: unknown): string[] {
    return [];
  }

-  // Extract MEDIA: paths from text content blocks.
+  // Extract MEDIA: paths from text content blocks using the shared parser so
+  // directive matching and validation stay in sync with outbound reply parsing.
  const paths: string[] = [];
  let hasImageContent = false;
  for (const item of content) {
@@ -216,24 +217,9 @@ export function extractToolResultMediaPaths(result: unknown): string[] {
      continue;
    }
    if (entry.type === "text" && typeof entry.text === "string") {
-      // Only parse lines that start with MEDIA: (after trimming) to avoid
-      // false-matching placeholders like <media:audio> or mid-line mentions.
-      // Mirrors the line-start guard in splitMediaFromOutput (media/parse.ts).
-      for (const line of entry.text.split("\n")) {
-        if (!line.trimStart().startsWith("MEDIA:")) {
-          continue;
-        }
-        MEDIA_TOKEN_RE.lastIndex = 0;
-        let match: RegExpExecArray | null;
-        while ((match = MEDIA_TOKEN_RE.exec(line)) !== null) {
-          const p = match[1]
-            ?.replace(/^[`"'[{(]+/, "")
-            .replace(/[`"'\]})\\,]+$/, "")
-            .trim();
-          if (p && p.length <= 4096) {
-            paths.push(p);
-          }
-        }
+      const parsed = splitMediaFromOutput(entry.text);
+      if (parsed.mediaUrls?.length) {
+        paths.push(...parsed.mediaUrls);
      }
    }
  }
--- a/src/telegram/bot-message-dispatch.test.ts
+++ b/src/telegram/bot-message-dispatch.test.ts
@@ -404,6 +404,37 @@ describe("dispatchTelegramMessage draft streaming", () => {
    expect(draftStream.stop).toHaveBeenCalled();
  });

+  it("keeps streamed preview visible when final text regresses after a tool warning", async () => {
+    const draftStream = createDraftStream(999);
+    createTelegramDraftStream.mockReturnValue(draftStream);
+    dispatchReplyWithBufferedBlockDispatcher.mockImplementation(
+      async ({ dispatcherOptions, replyOptions }) => {
+        await replyOptions?.onPartialReply?.({ text: "Recovered final answer." });
+        await dispatcherOptions.deliver(
+          { text: "⚠️ Recovered tool error details", isError: true },
+          { kind: "tool" },
+        );
+        await dispatcherOptions.deliver({ text: "Recovered final answer" }, { kind: "final" });
+        return { queuedFinal: true };
+      },
+    );
+    deliverReplies.mockResolvedValue({ delivered: true });
+
+    await dispatchWithContext({ context: createContext(), streamMode: "partial" });
+
+    // Regressive final ("answer." -> "answer") should keep the preview instead
+    // of clearing it and leaving only the tool warning visible.
+    expect(editMessageTelegram).not.toHaveBeenCalled();
+    expect(deliverReplies).toHaveBeenCalledTimes(1);
+    expect(deliverReplies).toHaveBeenCalledWith(
+      expect.objectContaining({
+        replies: [expect.objectContaining({ text: "⚠️ Recovered tool error details" })],
+      }),
+    );
+    expect(draftStream.clear).not.toHaveBeenCalled();
+    expect(draftStream.stop).toHaveBeenCalled();
+  });
+
  it("falls back to normal delivery when preview final is too long to edit", async () => {
    const draftStream = createDraftStream(999);
    createTelegramDraftStream.mockReturnValue(draftStream);