From e1bc5cad25b0906945efec52fb81314b590caec2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 2 Mar 2026 19:27:50 +0000 Subject: [PATCH] fix(outbound): harden plain-text HTML sanitization paths (#32034) --- src/infra/outbound/deliver.test.ts | 49 ++++++++++++++++++++++++ src/infra/outbound/deliver.ts | 32 ++++++++++++---- src/infra/outbound/sanitize-text.test.ts | 6 +++ src/infra/outbound/sanitize-text.ts | 2 + 4 files changed, 82 insertions(+), 7 deletions(-) diff --git a/src/infra/outbound/deliver.test.ts b/src/infra/outbound/deliver.test.ts index 71acf883b..79b1ba746 100644 --- a/src/infra/outbound/deliver.test.ts +++ b/src/infra/outbound/deliver.test.ts @@ -221,6 +221,30 @@ describe("deliverOutboundPayloads", () => { ); }); + it("preserves HTML text for telegram sendPayload channelData path", async () => { + const sendTelegram = vi.fn().mockResolvedValue({ messageId: "m1", chatId: "c1" }); + + await deliverOutboundPayloads({ + cfg: telegramChunkConfig, + channel: "telegram", + to: "123", + payloads: [ + { + text: "hello", + channelData: { telegram: { buttons: [] } }, + }, + ], + deps: { sendTelegram }, + }); + + expect(sendTelegram).toHaveBeenCalledTimes(1); + expect(sendTelegram).toHaveBeenCalledWith( + "123", + "hello", + expect.objectContaining({ textMode: "html" }), + ); + }); + it("scopes media local roots to the active agent workspace when agentId is provided", async () => { const sendTelegram = vi.fn().mockResolvedValue({ messageId: "m1", chatId: "c1" }); @@ -442,6 +466,17 @@ describe("deliverOutboundPayloads", () => { expect(results).toEqual([]); }); + it("drops HTML-only WhatsApp text payloads after sanitization", async () => { + const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" }); + const results = await deliverWhatsAppPayload({ + sendWhatsApp, + payload: { text: "

" }, + }); + + expect(sendWhatsApp).not.toHaveBeenCalled(); + expect(results).toEqual([]); + }); + it("keeps WhatsApp media payloads but clears whitespace-only captions", async () => { const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" }); await deliverWhatsAppPayload({ @@ -461,6 +496,20 @@ describe("deliverOutboundPayloads", () => { ); }); + it("drops non-WhatsApp HTML-only text payloads after sanitization", async () => { + const sendSignal = vi.fn().mockResolvedValue({ messageId: "s1", toJid: "jid" }); + const results = await deliverOutboundPayloads({ + cfg: {}, + channel: "signal", + to: "+1555", + payloads: [{ text: "
" }], + deps: { sendSignal }, + }); + + expect(sendSignal).not.toHaveBeenCalled(); + expect(results).toEqual([]); + }); + it("preserves fenced blocks for markdown chunkers in newline mode", async () => { const chunker = vi.fn((text: string) => (text ? [text] : [])); const sendText = vi.fn().mockImplementation(async ({ text }: { text: string }) => ({ diff --git a/src/infra/outbound/deliver.ts b/src/infra/outbound/deliver.ts index 1fd5f3de7..c5b986bae 100644 --- a/src/infra/outbound/deliver.ts +++ b/src/infra/outbound/deliver.ts @@ -446,14 +446,21 @@ async function deliverOutboundPayloadsCore( text: normalizedText, }; }; - const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads) - .flatMap((payload) => { - if (channel !== "whatsapp") { - return [payload]; + const normalizeEmptyTextPayload = (payload: ReplyPayload): ReplyPayload | null => { + const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + const rawText = typeof payload.text === "string" ? payload.text : ""; + if (!rawText.trim()) { + if (!hasMedia) { + return null; } - const normalized = normalizeWhatsAppPayload(payload); - return normalized ? [normalized] : []; - }) + return { + ...payload, + text: "", + }; + } + return payload; + }; + const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads) .map((payload) => { // Strip HTML tags for plain-text surfaces (WhatsApp, Signal, etc.) // Models occasionally produce
, , etc. that render as literal text. @@ -461,7 +468,18 @@ async function deliverOutboundPayloadsCore( if (!isPlainTextSurface(channel) || !payload.text) { return payload; } + // Telegram sendPayload uses textMode:"html". Preserve raw HTML in this path. + if (channel === "telegram" && payload.channelData) { + return payload; + } return { ...payload, text: sanitizeForPlainText(payload.text) }; + }) + .flatMap((payload) => { + const normalized = + channel === "whatsapp" + ? normalizeWhatsAppPayload(payload) + : normalizeEmptyTextPayload(payload); + return normalized ? [normalized] : []; }); const hookRunner = getGlobalHookRunner(); const sessionKeyForInternalHooks = params.mirror?.sessionKey ?? params.session?.key; diff --git a/src/infra/outbound/sanitize-text.test.ts b/src/infra/outbound/sanitize-text.test.ts index c7aebb6db..b22b45df2 100644 --- a/src/infra/outbound/sanitize-text.test.ts +++ b/src/infra/outbound/sanitize-text.test.ts @@ -85,6 +85,12 @@ describe("sanitizeForPlainText", () => { expect(sanitizeForPlainText('link')).toBe("link"); }); + it("preserves angle-bracket autolinks", () => { + expect(sanitizeForPlainText("See now")).toBe( + "See https://example.com/path?q=1 now", + ); + }); + // --- passthrough -------------------------------------------------------- it("passes through clean text unchanged", () => { diff --git a/src/infra/outbound/sanitize-text.ts b/src/infra/outbound/sanitize-text.ts index fb6b6abbb..84adfda3a 100644 --- a/src/infra/outbound/sanitize-text.ts +++ b/src/infra/outbound/sanitize-text.ts @@ -38,6 +38,8 @@ export function isPlainTextSurface(channelId: string): boolean { export function sanitizeForPlainText(text: string): string { return ( text + // Preserve angle-bracket autolinks as plain URLs before tag stripping. + .replace(/<((?:https?:\/\/|mailto:)[^<>\s]+)>/gi, "$1") // Line breaks .replace(//gi, "\n") // Block elements → newlines