fix(agents): validate tool-result MEDIA directives with shared parser

Co-authored-by: Ho Lim <166576253+HOYALIM@users.noreply.github.com>
This commit is contained in:
Peter Steinberger
2026-02-22 19:51:38 +01:00
parent 0c1f491a02
commit c3d11d56c3
4 changed files with 50 additions and 20 deletions

View File

@@ -59,6 +59,7 @@ Docs: https://docs.openclaw.ai
- Telegram/Polling: clear Telegram webhooks (`deleteWebhook`) before starting long-poll `getUpdates`, including retry handling for transient cleanup failures.
- Telegram/Webhook: add `channels.telegram.webhookPort` config support and pass it through plugin startup wiring to the monitor listener.
- Telegram/Media: send a user-facing Telegram reply when media download fails (non-size errors) instead of silently dropping the message.
- Agents/Media: route tool-result `MEDIA:` extraction through shared parser validation so malformed prose like `MEDIA:-prefixed ...` is no longer treated as a local file path (prevents Telegram ENOENT tool-error overrides). (#18780) Thanks @HOYALIM.
- Logging: cap single log-file size with `logging.maxFileBytes` (default 500 MB) and suppress additional writes after cap hit to prevent disk exhaustion from repeated error storms.
- Memory/Remote HTTP: centralize remote memory HTTP calls behind a shared guarded helper (`withRemoteHttpResponse`) so embeddings and batch flows use one request/release path.
- Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook.

View File

@@ -175,6 +175,18 @@ describe("extractToolResultMediaPaths", () => {
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
it("does not treat malformed MEDIA:-prefixed prose as a file path", () => {
const result = {
content: [
{
type: "text",
text: "MEDIA:-prefixed paths (lenient whitespace) when loading outbound media",
},
],
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
it("still extracts MEDIA: at line start after other text lines", () => {
const result = {
content: [

View File

@@ -1,6 +1,6 @@
import { getChannelPlugin, normalizeChannelId } from "../channels/plugins/index.js";
import { normalizeTargetForProvider } from "../infra/outbound/target-normalization.js";
import { MEDIA_TOKEN_RE } from "../media/parse.js";
import { splitMediaFromOutput } from "../media/parse.js";
import { truncateUtf16Safe } from "../utils.js";
import { collectTextContentBlocks } from "./content-blocks.js";
import { type MessagingToolSend } from "./pi-embedded-messaging.js";
@@ -203,7 +203,8 @@ export function extractToolResultMediaPaths(result: unknown): string[] {
return [];
}
// Extract MEDIA: paths from text content blocks.
// Extract MEDIA: paths from text content blocks using the shared parser so
// directive matching and validation stay in sync with outbound reply parsing.
const paths: string[] = [];
let hasImageContent = false;
for (const item of content) {
@@ -216,24 +217,9 @@ export function extractToolResultMediaPaths(result: unknown): string[] {
continue;
}
if (entry.type === "text" && typeof entry.text === "string") {
// Only parse lines that start with MEDIA: (after trimming) to avoid
// false-matching placeholders like <media:audio> or mid-line mentions.
// Mirrors the line-start guard in splitMediaFromOutput (media/parse.ts).
for (const line of entry.text.split("\n")) {
if (!line.trimStart().startsWith("MEDIA:")) {
continue;
}
MEDIA_TOKEN_RE.lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = MEDIA_TOKEN_RE.exec(line)) !== null) {
const p = match[1]
?.replace(/^[`"'[{(]+/, "")
.replace(/[`"'\]})\\,]+$/, "")
.trim();
if (p && p.length <= 4096) {
paths.push(p);
}
}
const parsed = splitMediaFromOutput(entry.text);
if (parsed.mediaUrls?.length) {
paths.push(...parsed.mediaUrls);
}
}
}

View File

@@ -404,6 +404,37 @@ describe("dispatchTelegramMessage draft streaming", () => {
expect(draftStream.stop).toHaveBeenCalled();
});
it("keeps streamed preview visible when final text regresses after a tool warning", async () => {
const draftStream = createDraftStream(999);
createTelegramDraftStream.mockReturnValue(draftStream);
dispatchReplyWithBufferedBlockDispatcher.mockImplementation(
async ({ dispatcherOptions, replyOptions }) => {
await replyOptions?.onPartialReply?.({ text: "Recovered final answer." });
await dispatcherOptions.deliver(
{ text: "⚠️ Recovered tool error details", isError: true },
{ kind: "tool" },
);
await dispatcherOptions.deliver({ text: "Recovered final answer" }, { kind: "final" });
return { queuedFinal: true };
},
);
deliverReplies.mockResolvedValue({ delivered: true });
await dispatchWithContext({ context: createContext(), streamMode: "partial" });
// Regressive final ("answer." -> "answer") should keep the preview instead
// of clearing it and leaving only the tool warning visible.
expect(editMessageTelegram).not.toHaveBeenCalled();
expect(deliverReplies).toHaveBeenCalledTimes(1);
expect(deliverReplies).toHaveBeenCalledWith(
expect.objectContaining({
replies: [expect.objectContaining({ text: "⚠️ Recovered tool error details" })],
}),
);
expect(draftStream.clear).not.toHaveBeenCalled();
expect(draftStream.stop).toHaveBeenCalled();
});
it("falls back to normal delivery when preview final is too long to edit", async () => {
const draftStream = createDraftStream(999);
createTelegramDraftStream.mockReturnValue(draftStream);