fix(outbound): harden plain-text HTML sanitization paths (#32034)

This commit is contained in:
Peter Steinberger
2026-03-02 19:27:50 +00:00
parent 62d0cfeee7
commit e1bc5cad25
4 changed files with 82 additions and 7 deletions

View File

@@ -221,6 +221,30 @@ describe("deliverOutboundPayloads", () => {
);
});
it("preserves HTML text for telegram sendPayload channelData path", async () => {
const sendTelegram = vi.fn().mockResolvedValue({ messageId: "m1", chatId: "c1" });
await deliverOutboundPayloads({
cfg: telegramChunkConfig,
channel: "telegram",
to: "123",
payloads: [
{
text: "<b>hello</b>",
channelData: { telegram: { buttons: [] } },
},
],
deps: { sendTelegram },
});
expect(sendTelegram).toHaveBeenCalledTimes(1);
expect(sendTelegram).toHaveBeenCalledWith(
"123",
"<b>hello</b>",
expect.objectContaining({ textMode: "html" }),
);
});
it("scopes media local roots to the active agent workspace when agentId is provided", async () => {
const sendTelegram = vi.fn().mockResolvedValue({ messageId: "m1", chatId: "c1" });
@@ -442,6 +466,17 @@ describe("deliverOutboundPayloads", () => {
expect(results).toEqual([]);
});
it("drops HTML-only WhatsApp text payloads after sanitization", async () => {
const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" });
const results = await deliverWhatsAppPayload({
sendWhatsApp,
payload: { text: "<br><br>" },
});
expect(sendWhatsApp).not.toHaveBeenCalled();
expect(results).toEqual([]);
});
it("keeps WhatsApp media payloads but clears whitespace-only captions", async () => {
const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" });
await deliverWhatsAppPayload({
@@ -461,6 +496,20 @@ describe("deliverOutboundPayloads", () => {
);
});
it("drops non-WhatsApp HTML-only text payloads after sanitization", async () => {
const sendSignal = vi.fn().mockResolvedValue({ messageId: "s1", toJid: "jid" });
const results = await deliverOutboundPayloads({
cfg: {},
channel: "signal",
to: "+1555",
payloads: [{ text: "<br>" }],
deps: { sendSignal },
});
expect(sendSignal).not.toHaveBeenCalled();
expect(results).toEqual([]);
});
it("preserves fenced blocks for markdown chunkers in newline mode", async () => {
const chunker = vi.fn((text: string) => (text ? [text] : []));
const sendText = vi.fn().mockImplementation(async ({ text }: { text: string }) => ({

View File

@@ -446,14 +446,21 @@ async function deliverOutboundPayloadsCore(
text: normalizedText,
};
};
const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads)
.flatMap((payload) => {
if (channel !== "whatsapp") {
return [payload];
const normalizeEmptyTextPayload = (payload: ReplyPayload): ReplyPayload | null => {
const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0;
const rawText = typeof payload.text === "string" ? payload.text : "";
if (!rawText.trim()) {
if (!hasMedia) {
return null;
}
const normalized = normalizeWhatsAppPayload(payload);
return normalized ? [normalized] : [];
})
return {
...payload,
text: "",
};
}
return payload;
};
const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads)
.map((payload) => {
// Strip HTML tags for plain-text surfaces (WhatsApp, Signal, etc.)
// Models occasionally produce <br>, <b>, etc. that render as literal text.
@@ -461,7 +468,18 @@ async function deliverOutboundPayloadsCore(
if (!isPlainTextSurface(channel) || !payload.text) {
return payload;
}
// Telegram sendPayload uses textMode:"html". Preserve raw HTML in this path.
if (channel === "telegram" && payload.channelData) {
return payload;
}
return { ...payload, text: sanitizeForPlainText(payload.text) };
})
.flatMap((payload) => {
const normalized =
channel === "whatsapp"
? normalizeWhatsAppPayload(payload)
: normalizeEmptyTextPayload(payload);
return normalized ? [normalized] : [];
});
const hookRunner = getGlobalHookRunner();
const sessionKeyForInternalHooks = params.mirror?.sessionKey ?? params.session?.key;

View File

@@ -85,6 +85,12 @@ describe("sanitizeForPlainText", () => {
expect(sanitizeForPlainText('<a href="https://example.com">link</a>')).toBe("link");
});
it("preserves angle-bracket autolinks", () => {
expect(sanitizeForPlainText("See <https://example.com/path?q=1> now")).toBe(
"See https://example.com/path?q=1 now",
);
});
// --- passthrough --------------------------------------------------------
it("passes through clean text unchanged", () => {

View File

@@ -38,6 +38,8 @@ export function isPlainTextSurface(channelId: string): boolean {
export function sanitizeForPlainText(text: string): string {
return (
text
// Preserve angle-bracket autolinks as plain URLs before tag stripping.
.replace(/<((?:https?:\/\/|mailto:)[^<>\s]+)>/gi, "$1")
// Line breaks
.replace(/<br\s*\/?>/gi, "\n")
// Block elements → newlines