fix(agents): validate tool-result MEDIA directives with shared parser
Co-authored-by: Ho Lim <166576253+HOYALIM@users.noreply.github.com>
This commit is contained in:
@@ -59,6 +59,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Telegram/Polling: clear Telegram webhooks (`deleteWebhook`) before starting long-poll `getUpdates`, including retry handling for transient cleanup failures.
|
||||
- Telegram/Webhook: add `channels.telegram.webhookPort` config support and pass it through plugin startup wiring to the monitor listener.
|
||||
- Telegram/Media: send a user-facing Telegram reply when media download fails (non-size errors) instead of silently dropping the message.
|
||||
- Agents/Media: route tool-result `MEDIA:` extraction through shared parser validation so malformed prose like `MEDIA:-prefixed ...` is no longer treated as a local file path (prevents Telegram ENOENT tool-error overrides). (#18780) Thanks @HOYALIM.
|
||||
- Logging: cap single log-file size with `logging.maxFileBytes` (default 500 MB) and suppress additional writes after cap hit to prevent disk exhaustion from repeated error storms.
|
||||
- Memory/Remote HTTP: centralize remote memory HTTP calls behind a shared guarded helper (`withRemoteHttpResponse`) so embeddings and batch flows use one request/release path.
|
||||
- Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook.
|
||||
|
||||
@@ -175,6 +175,18 @@ describe("extractToolResultMediaPaths", () => {
|
||||
expect(extractToolResultMediaPaths(result)).toEqual([]);
|
||||
});
|
||||
|
||||
it("does not treat malformed MEDIA:-prefixed prose as a file path", () => {
|
||||
const result = {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "MEDIA:-prefixed paths (lenient whitespace) when loading outbound media",
|
||||
},
|
||||
],
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual([]);
|
||||
});
|
||||
|
||||
it("still extracts MEDIA: at line start after other text lines", () => {
|
||||
const result = {
|
||||
content: [
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { getChannelPlugin, normalizeChannelId } from "../channels/plugins/index.js";
|
||||
import { normalizeTargetForProvider } from "../infra/outbound/target-normalization.js";
|
||||
import { MEDIA_TOKEN_RE } from "../media/parse.js";
|
||||
import { splitMediaFromOutput } from "../media/parse.js";
|
||||
import { truncateUtf16Safe } from "../utils.js";
|
||||
import { collectTextContentBlocks } from "./content-blocks.js";
|
||||
import { type MessagingToolSend } from "./pi-embedded-messaging.js";
|
||||
@@ -203,7 +203,8 @@ export function extractToolResultMediaPaths(result: unknown): string[] {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Extract MEDIA: paths from text content blocks.
|
||||
// Extract MEDIA: paths from text content blocks using the shared parser so
|
||||
// directive matching and validation stay in sync with outbound reply parsing.
|
||||
const paths: string[] = [];
|
||||
let hasImageContent = false;
|
||||
for (const item of content) {
|
||||
@@ -216,24 +217,9 @@ export function extractToolResultMediaPaths(result: unknown): string[] {
|
||||
continue;
|
||||
}
|
||||
if (entry.type === "text" && typeof entry.text === "string") {
|
||||
// Only parse lines that start with MEDIA: (after trimming) to avoid
|
||||
// false-matching placeholders like <media:audio> or mid-line mentions.
|
||||
// Mirrors the line-start guard in splitMediaFromOutput (media/parse.ts).
|
||||
for (const line of entry.text.split("\n")) {
|
||||
if (!line.trimStart().startsWith("MEDIA:")) {
|
||||
continue;
|
||||
}
|
||||
MEDIA_TOKEN_RE.lastIndex = 0;
|
||||
let match: RegExpExecArray | null;
|
||||
while ((match = MEDIA_TOKEN_RE.exec(line)) !== null) {
|
||||
const p = match[1]
|
||||
?.replace(/^[`"'[{(]+/, "")
|
||||
.replace(/[`"'\]})\\,]+$/, "")
|
||||
.trim();
|
||||
if (p && p.length <= 4096) {
|
||||
paths.push(p);
|
||||
}
|
||||
}
|
||||
const parsed = splitMediaFromOutput(entry.text);
|
||||
if (parsed.mediaUrls?.length) {
|
||||
paths.push(...parsed.mediaUrls);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -404,6 +404,37 @@ describe("dispatchTelegramMessage draft streaming", () => {
|
||||
expect(draftStream.stop).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("keeps streamed preview visible when final text regresses after a tool warning", async () => {
|
||||
const draftStream = createDraftStream(999);
|
||||
createTelegramDraftStream.mockReturnValue(draftStream);
|
||||
dispatchReplyWithBufferedBlockDispatcher.mockImplementation(
|
||||
async ({ dispatcherOptions, replyOptions }) => {
|
||||
await replyOptions?.onPartialReply?.({ text: "Recovered final answer." });
|
||||
await dispatcherOptions.deliver(
|
||||
{ text: "⚠️ Recovered tool error details", isError: true },
|
||||
{ kind: "tool" },
|
||||
);
|
||||
await dispatcherOptions.deliver({ text: "Recovered final answer" }, { kind: "final" });
|
||||
return { queuedFinal: true };
|
||||
},
|
||||
);
|
||||
deliverReplies.mockResolvedValue({ delivered: true });
|
||||
|
||||
await dispatchWithContext({ context: createContext(), streamMode: "partial" });
|
||||
|
||||
// Regressive final ("answer." -> "answer") should keep the preview instead
|
||||
// of clearing it and leaving only the tool warning visible.
|
||||
expect(editMessageTelegram).not.toHaveBeenCalled();
|
||||
expect(deliverReplies).toHaveBeenCalledTimes(1);
|
||||
expect(deliverReplies).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
replies: [expect.objectContaining({ text: "⚠️ Recovered tool error details" })],
|
||||
}),
|
||||
);
|
||||
expect(draftStream.clear).not.toHaveBeenCalled();
|
||||
expect(draftStream.stop).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("falls back to normal delivery when preview final is too long to edit", async () => {
|
||||
const draftStream = createDraftStream(999);
|
||||
createTelegramDraftStream.mockReturnValue(draftStream);
|
||||
|
||||
Reference in New Issue
Block a user