// Shared helpers for parsing MEDIA tokens from command/stdout text. import { parseFenceSpans } from "../markdown/fences.js"; import { parseAudioTag } from "./audio-tags.js"; // Allow optional wrapping backticks and punctuation after the token; capture the core token. export const MEDIA_TOKEN_RE = /\bMEDIA:\s*`?([^\n]+)`?/gi; export function normalizeMediaSource(src: string) { return src.startsWith("file://") ? src.replace("file://", "") : src; } function cleanCandidate(raw: string) { return raw.replace(/^[`"'[{(]+/, "").replace(/[`"'\\})\],]+$/, ""); } const WINDOWS_DRIVE_RE = /^[a-zA-Z]:[\\/]/; const SCHEME_RE = /^[a-zA-Z][a-zA-Z0-9+.-]*:/; const HAS_FILE_EXT = /\.\w{1,10}$/; // Recognize local file path patterns. Security validation is deferred to the // load layer (loadWebMedia / resolveSandboxedMediaSource) which has the context // needed to enforce sandbox roots and allowed directories. function isLikelyLocalPath(candidate: string): boolean { return ( candidate.startsWith("/") || candidate.startsWith("./") || candidate.startsWith("../") || candidate.startsWith("~") || WINDOWS_DRIVE_RE.test(candidate) || candidate.startsWith("\\\\") || (!SCHEME_RE.test(candidate) && (candidate.includes("/") || candidate.includes("\\"))) ); } function isValidMedia( candidate: string, opts?: { allowSpaces?: boolean; allowBareFilename?: boolean }, ) { if (!candidate) { return false; } if (candidate.length > 4096) { return false; } if (!opts?.allowSpaces && /\s/.test(candidate)) { return false; } if (/^https?:\/\//i.test(candidate)) { return true; } if (isLikelyLocalPath(candidate)) { return true; } // Accept bare filenames (e.g. "image.png") only when the caller opts in. // This avoids treating space-split path fragments as separate media items. if (opts?.allowBareFilename && !SCHEME_RE.test(candidate) && HAS_FILE_EXT.test(candidate)) { return true; } return false; } function unwrapQuoted(value: string): string | undefined { const trimmed = value.trim(); if (trimmed.length < 2) { return undefined; } const first = trimmed[0]; const last = trimmed[trimmed.length - 1]; if (first !== last) { return undefined; } if (first !== `"` && first !== "'" && first !== "`") { return undefined; } return trimmed.slice(1, -1).trim(); } // Check if a character offset is inside any fenced code block function isInsideFence(fenceSpans: Array<{ start: number; end: number }>, offset: number): boolean { return fenceSpans.some((span) => offset >= span.start && offset < span.end); } export function splitMediaFromOutput(raw: string): { text: string; mediaUrls?: string[]; mediaUrl?: string; // legacy first item for backward compatibility audioAsVoice?: boolean; // true if [[audio_as_voice]] tag was found } { // KNOWN: Leading whitespace is semantically meaningful in Markdown (lists, indented fences). // We only trim the end; token cleanup below handles removing `MEDIA:` lines. const trimmedRaw = raw.trimEnd(); if (!trimmedRaw.trim()) { return { text: "" }; } const media: string[] = []; let foundMediaToken = false; // Parse fenced code blocks to avoid extracting MEDIA tokens from inside them const fenceSpans = parseFenceSpans(trimmedRaw); // Collect tokens line by line so we can strip them cleanly. const lines = trimmedRaw.split("\n"); const keptLines: string[] = []; let lineOffset = 0; // Track character offset for fence checking for (const line of lines) { // Skip MEDIA extraction if this line is inside a fenced code block if (isInsideFence(fenceSpans, lineOffset)) { keptLines.push(line); lineOffset += line.length + 1; // +1 for newline continue; } const trimmedStart = line.trimStart(); if (!trimmedStart.startsWith("MEDIA:")) { keptLines.push(line); lineOffset += line.length + 1; // +1 for newline continue; } const matches = Array.from(line.matchAll(MEDIA_TOKEN_RE)); if (matches.length === 0) { keptLines.push(line); lineOffset += line.length + 1; // +1 for newline continue; } const pieces: string[] = []; let cursor = 0; for (const match of matches) { const start = match.index ?? 0; pieces.push(line.slice(cursor, start)); const payload = match[1]; const unwrapped = unwrapQuoted(payload); const payloadValue = unwrapped ?? payload; const parts = unwrapped ? [unwrapped] : payload.split(/\s+/).filter(Boolean); const mediaStartIndex = media.length; let validCount = 0; const invalidParts: string[] = []; let hasValidMedia = false; for (const part of parts) { const candidate = normalizeMediaSource(cleanCandidate(part)); if (isValidMedia(candidate, unwrapped ? { allowSpaces: true } : undefined)) { media.push(candidate); hasValidMedia = true; foundMediaToken = true; validCount += 1; } else { invalidParts.push(part); } } const trimmedPayload = payloadValue.trim(); const looksLikeLocalPath = isLikelyLocalPath(trimmedPayload) || trimmedPayload.startsWith("file://"); if ( !unwrapped && validCount === 1 && invalidParts.length > 0 && /\s/.test(payloadValue) && looksLikeLocalPath ) { const fallback = normalizeMediaSource(cleanCandidate(payloadValue)); if (isValidMedia(fallback, { allowSpaces: true })) { media.splice(mediaStartIndex, media.length - mediaStartIndex, fallback); hasValidMedia = true; foundMediaToken = true; validCount = 1; invalidParts.length = 0; } } if (!hasValidMedia) { const fallback = normalizeMediaSource(cleanCandidate(payloadValue)); if (isValidMedia(fallback, { allowSpaces: true, allowBareFilename: true })) { media.push(fallback); hasValidMedia = true; foundMediaToken = true; invalidParts.length = 0; } } if (hasValidMedia) { if (invalidParts.length > 0) { pieces.push(invalidParts.join(" ")); } } else if (looksLikeLocalPath) { // Strip MEDIA: lines with local paths even when invalid (e.g. absolute paths // from internal tools like TTS). They should never leak as visible text. foundMediaToken = true; } else { // If no valid media was found in this match, keep the original token text. pieces.push(match[0]); } cursor = start + match[0].length; } pieces.push(line.slice(cursor)); const cleanedLine = pieces .join("") .replace(/[ \t]{2,}/g, " ") .trim(); // If the line becomes empty, drop it. if (cleanedLine) { keptLines.push(cleanedLine); } lineOffset += line.length + 1; // +1 for newline } let cleanedText = keptLines .join("\n") .replace(/[ \t]+\n/g, "\n") .replace(/[ \t]{2,}/g, " ") .replace(/\n{2,}/g, "\n") .trim(); // Detect and strip [[audio_as_voice]] tag const audioTagResult = parseAudioTag(cleanedText); const hasAudioAsVoice = audioTagResult.audioAsVoice; if (audioTagResult.hadTag) { cleanedText = audioTagResult.text.replace(/\n{2,}/g, "\n").trim(); } if (media.length === 0) { const result: ReturnType = { // Return cleaned text if we found a media token OR audio tag, otherwise original text: foundMediaToken || hasAudioAsVoice ? cleanedText : trimmedRaw, }; if (hasAudioAsVoice) { result.audioAsVoice = true; } return result; } return { text: cleanedText, mediaUrls: media, mediaUrl: media[0], ...(hasAudioAsVoice ? { audioAsVoice: true } : {}), }; }