perf: harden chunking against quadratic scans

2026-03-07 16:50:15 +00:00
parent b393b9e8ff
commit 74912037dc
7 changed files with 324 additions and 139 deletions
--- a/extensions/feishu/src/docx-batch-insert.test.ts
+++ b/extensions/feishu/src/docx-batch-insert.test.ts
@@ -0,0 +1,90 @@
+import { describe, expect, it, vi } from "vitest";
+import { BATCH_SIZE, insertBlocksInBatches } from "./docx-batch-insert.js";
+
+function createCountingIterable<T>(values: T[]) {
+  let iterations = 0;
+  return {
+    values: {
+      [Symbol.iterator]: function* () {
+        iterations += 1;
+        yield* values;
+      },
+    },
+    getIterations: () => iterations,
+  };
+}
+
+describe("insertBlocksInBatches", () => {
+  it("builds the source block map once for large flat trees", async () => {
+    const blockCount = BATCH_SIZE + 200;
+    const blocks = Array.from({ length: blockCount }, (_, index) => ({
+      block_id: `block_${index}`,
+      block_type: 2,
+    }));
+    const counting = createCountingIterable(blocks);
+    const createMock = vi.fn(async ({ data }: { data: { children_id: string[] } }) => ({
+      code: 0,
+      data: {
+        children: data.children_id.map((id) => ({ block_id: id })),
+      },
+    }));
+    const client = {
+      docx: {
+        documentBlockDescendant: {
+          create: createMock,
+        },
+      },
+    } as any;
+
+    const result = await insertBlocksInBatches(
+      client,
+      "doc_1",
+      counting.values as any[],
+      blocks.map((block) => block.block_id),
+    );
+
+    expect(counting.getIterations()).toBe(1);
+    expect(createMock).toHaveBeenCalledTimes(2);
+    expect(createMock.mock.calls[0]?.[0]?.data.children_id).toHaveLength(BATCH_SIZE);
+    expect(createMock.mock.calls[1]?.[0]?.data.children_id).toHaveLength(200);
+    expect(result.children).toHaveLength(blockCount);
+  });
+
+  it("keeps nested descendants grouped with their root blocks", async () => {
+    const createMock = vi.fn(
+      async ({
+        data,
+      }: {
+        data: { children_id: string[]; descendants: Array<{ block_id: string }> };
+      }) => ({
+        code: 0,
+        data: {
+          children: data.children_id.map((id) => ({ block_id: id })),
+        },
+      }),
+    );
+    const client = {
+      docx: {
+        documentBlockDescendant: {
+          create: createMock,
+        },
+      },
+    } as any;
+    const blocks = [
+      { block_id: "root_a", block_type: 1, children: ["child_a"] },
+      { block_id: "child_a", block_type: 2 },
+      { block_id: "root_b", block_type: 1, children: ["child_b"] },
+      { block_id: "child_b", block_type: 2 },
+    ];
+
+    await insertBlocksInBatches(client, "doc_1", blocks as any[], ["root_a", "root_b"]);
+
+    expect(createMock).toHaveBeenCalledTimes(1);
+    expect(createMock.mock.calls[0]?.[0]?.data.children_id).toEqual(["root_a", "root_b"]);
+    expect(
+      createMock.mock.calls[0]?.[0]?.data.descendants.map(
+        (block: { block_id: string }) => block.block_id,
+      ),
+    ).toEqual(["root_a", "child_a", "root_b", "child_b"]);
+  });
+});
--- a/extensions/feishu/src/docx-batch-insert.ts
+++ b/extensions/feishu/src/docx-batch-insert.ts
@@ -14,16 +14,11 @@ export const BATCH_SIZE = 1000; // Feishu API limit per request
 type Logger = { info?: (msg: string) => void };

 /**
- * Collect all descendant blocks for a given set of first-level block IDs.
+ * Collect all descendant blocks for a given first-level block ID.
 * Recursively traverses the block tree to gather all children.
 */
 // eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK block types
-function collectDescendants(blocks: any[], firstLevelIds: string[]): any[] {
-  const blockMap = new Map<string, any>();
-  for (const block of blocks) {
-    blockMap.set(block.block_id, block);
-  }
-
+function collectDescendants(blockMap: Map<string, any>, rootId: string): any[] {
  const result: any[] = [];
  const visited = new Set<string>();

@@ -47,9 +42,7 @@ function collectDescendants(blocks: any[], firstLevelIds: string[]): any[] {
    }
  }

-  for (const id of firstLevelIds) {
-    collect(id);
-  }
+  collect(rootId);

  return result;
 }
@@ -123,9 +116,13 @@ export async function insertBlocksInBatches(
  const batches: { firstLevelIds: string[]; blocks: any[] }[] = [];
  let currentBatch: { firstLevelIds: string[]; blocks: any[] } = { firstLevelIds: [], blocks: [] };
  const usedBlockIds = new Set<string>();
+  const blockMap = new Map<string, any>();
+  for (const block of blocks) {
+    blockMap.set(block.block_id, block);
+  }

  for (const firstLevelId of firstLevelBlockIds) {
-    const descendants = collectDescendants(blocks, [firstLevelId]);
+    const descendants = collectDescendants(blockMap, firstLevelId);
    const newBlocks = descendants.filter((b) => !usedBlockIds.has(b.block_id));

    // A single block whose subtree exceeds the API limit cannot be split
--- a/src/agents/pi-embedded-block-chunker.test.ts
+++ b/src/agents/pi-embedded-block-chunker.test.ts
@@ -1,4 +1,5 @@
-import { describe, expect, it } from "vitest";
+import { describe, expect, it, vi } from "vitest";
+import * as fences from "../markdown/fences.js";
 import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";

 function createFlushOnParagraphChunker(params: { minChars: number; maxChars: number }) {
@@ -120,4 +121,20 @@ describe("EmbeddedBlockChunker", () => {
    expect(chunks).toEqual(["Intro\n```js\nconst a = 1;\n\nconst b = 2;\n```"]);
    expect(chunker.bufferedText).toBe("After fence");
  });
+
+  it("parses fence spans once per drain call for long fenced buffers", () => {
+    const parseSpy = vi.spyOn(fences, "parseFenceSpans");
+    const chunker = new EmbeddedBlockChunker({
+      minChars: 20,
+      maxChars: 80,
+      breakPreference: "paragraph",
+    });
+
+    chunker.append(`\`\`\`txt\n${"line\n".repeat(600)}\`\`\``);
+    const chunks = drainChunks(chunker);
+
+    expect(chunks.length).toBeGreaterThan(2);
+    expect(parseSpy).toHaveBeenCalledTimes(1);
+    parseSpy.mockRestore();
+  });
 });
--- a/src/agents/pi-embedded-block-chunker.ts
+++ b/src/agents/pi-embedded-block-chunker.ts
@@ -12,6 +12,7 @@ export type BlockReplyChunking = {
 type FenceSplit = {
  closeFenceLine: string;
  reopenFenceLine: string;
+  fence: FenceSpan;
 };

 type BreakResult = {
@@ -28,6 +29,7 @@ function findSafeSentenceBreakIndex(
  text: string,
  fenceSpans: FenceSpan[],
  minChars: number,
+  offset = 0,
 ): number {
  const matches = text.matchAll(/[.!?](?=\s|$)/g);
  let sentenceIdx = -1;
@@ -37,7 +39,7 @@ function findSafeSentenceBreakIndex(
      continue;
    }
    const candidate = at + 1;
-    if (isSafeFenceBreak(fenceSpans, candidate)) {
+    if (isSafeFenceBreak(fenceSpans, offset + candidate)) {
      sentenceIdx = candidate;
    }
  }
@@ -49,8 +51,9 @@ function findSafeParagraphBreakIndex(params: {
  fenceSpans: FenceSpan[];
  minChars: number;
  reverse: boolean;
+  offset?: number;
 }): number {
-  const { text, fenceSpans, minChars, reverse } = params;
+  const { text, fenceSpans, minChars, reverse, offset = 0 } = params;
  let paragraphIdx = reverse ? text.lastIndexOf("\n\n") : text.indexOf("\n\n");
  while (reverse ? paragraphIdx >= minChars : paragraphIdx !== -1) {
    const candidates = [paragraphIdx, paragraphIdx + 1];
@@ -61,7 +64,7 @@ function findSafeParagraphBreakIndex(params: {
      if (candidate < 0 || candidate >= text.length) {
        continue;
      }
-      if (isSafeFenceBreak(fenceSpans, candidate)) {
+      if (isSafeFenceBreak(fenceSpans, offset + candidate)) {
        return candidate;
      }
    }
@@ -77,11 +80,12 @@ function findSafeNewlineBreakIndex(params: {
  fenceSpans: FenceSpan[];
  minChars: number;
  reverse: boolean;
+  offset?: number;
 }): number {
-  const { text, fenceSpans, minChars, reverse } = params;
+  const { text, fenceSpans, minChars, reverse, offset = 0 } = params;
  let newlineIdx = reverse ? text.lastIndexOf("\n") : text.indexOf("\n");
  while (reverse ? newlineIdx >= minChars : newlineIdx !== -1) {
-    if (newlineIdx >= minChars && isSafeFenceBreak(fenceSpans, newlineIdx)) {
+    if (newlineIdx >= minChars && isSafeFenceBreak(fenceSpans, offset + newlineIdx)) {
      return newlineIdx;
    }
    newlineIdx = reverse
@@ -125,14 +129,7 @@ export class EmbeddedBlockChunker {
    const minChars = Math.max(1, Math.floor(this.#chunking.minChars));
    const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars));

-    // When flushOnParagraph is set (chunkMode="newline"), eagerly split on \n\n
-    // boundaries regardless of minChars so each paragraph is sent immediately.
-    if (this.#chunking.flushOnParagraph && !force) {
-      this.#drainParagraphs(emit, maxChars);
-      return;
-    }
-
-    if (this.#buffer.length < minChars && !force) {
+    if (this.#buffer.length < minChars && !force && !this.#chunking.flushOnParagraph) {
      return;
    }

@@ -144,108 +141,132 @@ export class EmbeddedBlockChunker {
      return;
    }

-    while (this.#buffer.length >= minChars || (force && this.#buffer.length > 0)) {
+    const source = this.#buffer;
+    const fenceSpans = parseFenceSpans(source);
+    let start = 0;
+    let reopenFence: FenceSpan | undefined;
+
+    while (start < source.length) {
+      const reopenPrefix = reopenFence ? `${reopenFence.openLine}\n` : "";
+      const remainingLength = reopenPrefix.length + (source.length - start);
+
+      if (!force && !this.#chunking.flushOnParagraph && remainingLength < minChars) {
+        break;
+      }
+
+      if (this.#chunking.flushOnParagraph && !force) {
+        const paragraphBreak = findNextParagraphBreak(source, fenceSpans, start);
+        const paragraphLimit = Math.max(1, maxChars - reopenPrefix.length);
+        if (paragraphBreak && paragraphBreak.index - start <= paragraphLimit) {
+          const chunk = `${reopenPrefix}${source.slice(start, paragraphBreak.index)}`;
+          if (chunk.trim().length > 0) {
+            emit(chunk);
+          }
+          start = skipLeadingNewlines(source, paragraphBreak.index + paragraphBreak.length);
+          reopenFence = undefined;
+          continue;
+        }
+        if (remainingLength < maxChars) {
+          break;
+        }
+      }
+
+      const view = source.slice(start);
      const breakResult =
-        force && this.#buffer.length <= maxChars
-          ? this.#pickSoftBreakIndex(this.#buffer, 1)
-          : this.#pickBreakIndex(this.#buffer, force ? 1 : undefined);
+        force && remainingLength <= maxChars
+          ? this.#pickSoftBreakIndex(view, fenceSpans, 1, start)
+          : this.#pickBreakIndex(
+              view,
+              fenceSpans,
+              force || this.#chunking.flushOnParagraph ? 1 : undefined,
+              start,
+            );
      if (breakResult.index <= 0) {
        if (force) {
-          emit(this.#buffer);
-          this.#buffer = "";
+          emit(`${reopenPrefix}${source.slice(start)}`);
+          start = source.length;
+          reopenFence = undefined;
        }
-        return;
+        break;
      }

-      if (!this.#emitBreakResult(breakResult, emit)) {
+      const consumed = this.#emitBreakResult({
+        breakResult,
+        emit,
+        reopenPrefix,
+        source,
+        start,
+      });
+      if (consumed === null) {
        continue;
      }
+      start = consumed.start;
+      reopenFence = consumed.reopenFence;

-      if (this.#buffer.length < minChars && !force) {
-        return;
+      const nextLength =
+        (reopenFence ? `${reopenFence.openLine}\n`.length : 0) + (source.length - start);
+      if (nextLength < minChars && !force && !this.#chunking.flushOnParagraph) {
+        break;
      }
-      if (this.#buffer.length < maxChars && !force) {
-        return;
+      if (nextLength < maxChars && !force && !this.#chunking.flushOnParagraph) {
+        break;
      }
    }
+    this.#buffer = reopenFence
+      ? `${reopenFence.openLine}\n${source.slice(start)}`
+      : stripLeadingNewlines(source.slice(start));
  }

-  /** Eagerly emit complete paragraphs (text before \n\n) regardless of minChars. */
-  #drainParagraphs(emit: (chunk: string) => void, maxChars: number) {
-    while (this.#buffer.length > 0) {
-      const fenceSpans = parseFenceSpans(this.#buffer);
-      const paragraphBreak = findNextParagraphBreak(this.#buffer, fenceSpans);
-      if (!paragraphBreak || paragraphBreak.index > maxChars) {
-        // No paragraph boundary yet (or the next boundary is too far). If the
-        // buffer exceeds maxChars, fall back to normal break logic to avoid
-        // oversized chunks or unbounded accumulation.
-        if (this.#buffer.length >= maxChars) {
-          const breakResult = this.#pickBreakIndex(this.#buffer, 1);
-          if (breakResult.index > 0) {
-            this.#emitBreakResult(breakResult, emit);
-            continue;
-          }
-        }
-        return;
-      }
-
-      const chunk = this.#buffer.slice(0, paragraphBreak.index);
-      if (chunk.trim().length > 0) {
-        emit(chunk);
-      }
-      this.#buffer = stripLeadingNewlines(
-        this.#buffer.slice(paragraphBreak.index + paragraphBreak.length),
-      );
-    }
-  }
-
-  #emitBreakResult(breakResult: BreakResult, emit: (chunk: string) => void): boolean {
+  #emitBreakResult(params: {
+    breakResult: BreakResult;
+    emit: (chunk: string) => void;
+    reopenPrefix: string;
+    source: string;
+    start: number;
+  }): { start: number; reopenFence?: FenceSpan } | null {
+    const { breakResult, emit, reopenPrefix, source, start } = params;
    const breakIdx = breakResult.index;
    if (breakIdx <= 0) {
-      return false;
+      return null;
    }

-    let rawChunk = this.#buffer.slice(0, breakIdx);
+    const absoluteBreakIdx = start + breakIdx;
+    let rawChunk = `${reopenPrefix}${source.slice(start, absoluteBreakIdx)}`;
    if (rawChunk.trim().length === 0) {
-      this.#buffer = stripLeadingNewlines(this.#buffer.slice(breakIdx)).trimStart();
-      return false;
+      return { start: skipLeadingNewlines(source, absoluteBreakIdx), reopenFence: undefined };
    }

-    let nextBuffer = this.#buffer.slice(breakIdx);
    const fenceSplit = breakResult.fenceSplit;
    if (fenceSplit) {
      const closeFence = rawChunk.endsWith("\n")
        ? `${fenceSplit.closeFenceLine}\n`
        : `\n${fenceSplit.closeFenceLine}\n`;
      rawChunk = `${rawChunk}${closeFence}`;
-
-      const reopenFence = fenceSplit.reopenFenceLine.endsWith("\n")
-        ? fenceSplit.reopenFenceLine
-        : `${fenceSplit.reopenFenceLine}\n`;
-      nextBuffer = `${reopenFence}${nextBuffer}`;
    }

    emit(rawChunk);

    if (fenceSplit) {
-      this.#buffer = nextBuffer;
-    } else {
-      const nextStart =
-        breakIdx < this.#buffer.length && /\s/.test(this.#buffer[breakIdx])
-          ? breakIdx + 1
-          : breakIdx;
-      this.#buffer = stripLeadingNewlines(this.#buffer.slice(nextStart));
+      return { start: absoluteBreakIdx, reopenFence: fenceSplit.fence };
    }

-    return true;
+    const nextStart =
+      absoluteBreakIdx < source.length && /\s/.test(source[absoluteBreakIdx])
+        ? absoluteBreakIdx + 1
+        : absoluteBreakIdx;
+    return { start: skipLeadingNewlines(source, nextStart), reopenFence: undefined };
  }

-  #pickSoftBreakIndex(buffer: string, minCharsOverride?: number): BreakResult {
+  #pickSoftBreakIndex(
+    buffer: string,
+    fenceSpans: FenceSpan[],
+    minCharsOverride?: number,
+    offset = 0,
+  ): BreakResult {
    const minChars = Math.max(1, Math.floor(minCharsOverride ?? this.#chunking.minChars));
    if (buffer.length < minChars) {
      return { index: -1 };
    }
-    const fenceSpans = parseFenceSpans(buffer);
    const preference = this.#chunking.breakPreference ?? "paragraph";

    if (preference === "paragraph") {
@@ -254,6 +275,7 @@ export class EmbeddedBlockChunker {
        fenceSpans,
        minChars,
        reverse: false,
+        offset,
      });
      if (paragraphIdx !== -1) {
        return { index: paragraphIdx };
@@ -266,6 +288,7 @@ export class EmbeddedBlockChunker {
        fenceSpans,
        minChars,
        reverse: false,
+        offset,
      });
      if (newlineIdx !== -1) {
        return { index: newlineIdx };
@@ -273,7 +296,7 @@ export class EmbeddedBlockChunker {
    }

    if (preference !== "newline") {
-      const sentenceIdx = findSafeSentenceBreakIndex(buffer, fenceSpans, minChars);
+      const sentenceIdx = findSafeSentenceBreakIndex(buffer, fenceSpans, minChars, offset);
      if (sentenceIdx !== -1) {
        return { index: sentenceIdx };
      }
@@ -282,14 +305,18 @@ export class EmbeddedBlockChunker {
    return { index: -1 };
  }

-  #pickBreakIndex(buffer: string, minCharsOverride?: number): BreakResult {
+  #pickBreakIndex(
+    buffer: string,
+    fenceSpans: FenceSpan[],
+    minCharsOverride?: number,
+    offset = 0,
+  ): BreakResult {
    const minChars = Math.max(1, Math.floor(minCharsOverride ?? this.#chunking.minChars));
    const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars));
    if (buffer.length < minChars) {
      return { index: -1 };
    }
    const window = buffer.slice(0, Math.min(maxChars, buffer.length));
-    const fenceSpans = parseFenceSpans(buffer);

    const preference = this.#chunking.breakPreference ?? "paragraph";
    if (preference === "paragraph") {
@@ -298,6 +325,7 @@ export class EmbeddedBlockChunker {
        fenceSpans,
        minChars,
        reverse: true,
+        offset,
      });
      if (paragraphIdx !== -1) {
        return { index: paragraphIdx };
@@ -310,6 +338,7 @@ export class EmbeddedBlockChunker {
        fenceSpans,
        minChars,
        reverse: true,
+        offset,
      });
      if (newlineIdx !== -1) {
        return { index: newlineIdx };
@@ -317,7 +346,7 @@ export class EmbeddedBlockChunker {
    }

    if (preference !== "newline") {
-      const sentenceIdx = findSafeSentenceBreakIndex(window, fenceSpans, minChars);
+      const sentenceIdx = findSafeSentenceBreakIndex(window, fenceSpans, minChars, offset);
      if (sentenceIdx !== -1) {
        return { index: sentenceIdx };
      }
@@ -328,22 +357,23 @@ export class EmbeddedBlockChunker {
    }

    for (let i = window.length - 1; i >= minChars; i--) {
-      if (/\s/.test(window[i]) && isSafeFenceBreak(fenceSpans, i)) {
+      if (/\s/.test(window[i]) && isSafeFenceBreak(fenceSpans, offset + i)) {
        return { index: i };
      }
    }

    if (buffer.length >= maxChars) {
-      if (isSafeFenceBreak(fenceSpans, maxChars)) {
+      if (isSafeFenceBreak(fenceSpans, offset + maxChars)) {
        return { index: maxChars };
      }
-      const fence = findFenceSpanAt(fenceSpans, maxChars);
+      const fence = findFenceSpanAt(fenceSpans, offset + maxChars);
      if (fence) {
        return {
          index: maxChars,
          fenceSplit: {
            closeFenceLine: `${fence.indent}${fence.marker}`,
            reopenFenceLine: fence.openLine,
+            fence,
          },
        };
      }
@@ -354,12 +384,17 @@ export class EmbeddedBlockChunker {
  }
 }

-function stripLeadingNewlines(value: string): string {
-  let i = 0;
+function skipLeadingNewlines(value: string, start = 0): number {
+  let i = start;
  while (i < value.length && value[i] === "\n") {
    i++;
  }
-  return i > 0 ? value.slice(i) : value;
+  return i;
+}
+
+function stripLeadingNewlines(value: string): string {
+  const start = skipLeadingNewlines(value);
+  return start > 0 ? value.slice(start) : value;
 }

 function findNextParagraphBreak(
--- a/src/auto-reply/chunk.test.ts
+++ b/src/auto-reply/chunk.test.ts
@@ -1,4 +1,5 @@
-import { describe, expect, it } from "vitest";
+import { describe, expect, it, vi } from "vitest";
+import * as fences from "../markdown/fences.js";
 import { hasBalancedFences } from "../test-utils/chunk-test-helpers.js";
 import {
  chunkByNewline,
@@ -217,6 +218,17 @@ describe("chunkMarkdownText", () => {
    expect(chunks[0]?.length).toBe(20);
    expect(chunks.join("")).toBe(text);
  });
+
+  it("parses fence spans once for long fenced payloads", () => {
+    const parseSpy = vi.spyOn(fences, "parseFenceSpans");
+    const text = `\`\`\`txt\n${"line\n".repeat(600)}\`\`\``;
+
+    const chunks = chunkMarkdownText(text, 80);
+
+    expect(chunks.length).toBeGreaterThan(2);
+    expect(parseSpy).toHaveBeenCalledTimes(1);
+    parseSpy.mockRestore();
+  });
 });

 describe("chunkByNewline", () => {
--- a/src/auto-reply/chunk.ts
+++ b/src/auto-reply/chunk.ts
@@ -306,7 +306,7 @@ export function chunkText(text: string, limit: number): string[] {
  }
  return chunkTextByBreakResolver(text, limit, (window) => {
    // 1) Prefer a newline break inside the window (outside parentheses).
-    const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window);
+    const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window, 0, window.length);
    // 2) Otherwise prefer the last whitespace (word boundary) inside the window.
    return lastNewline > 0 ? lastNewline : lastWhitespace;
  });
@@ -319,14 +319,24 @@ export function chunkMarkdownText(text: string, limit: number): string[] {
  }

  const chunks: string[] = [];
-  let remaining = text;
+  const spans = parseFenceSpans(text);
+  let start = 0;
+  let reopenFence: ReturnType<typeof findFenceSpanAt> | undefined;

-  while (remaining.length > limit) {
-    const spans = parseFenceSpans(remaining);
-    const window = remaining.slice(0, limit);
+  while (start < text.length) {
+    const reopenPrefix = reopenFence ? `${reopenFence.openLine}\n` : "";
+    const contentLimit = Math.max(1, limit - reopenPrefix.length);
+    if (text.length - start <= contentLimit) {
+      const finalChunk = `${reopenPrefix}${text.slice(start)}`;
+      if (finalChunk.length > 0) {
+        chunks.push(finalChunk);
+      }
+      break;
+    }

-    const softBreak = pickSafeBreakIndex(window, spans);
-    let breakIdx = softBreak > 0 ? softBreak : limit;
+    const windowEnd = Math.min(text.length, start + contentLimit);
+    const softBreak = pickSafeBreakIndex(text, start, windowEnd, spans);
+    let breakIdx = softBreak > start ? softBreak : windowEnd;

    const initialFence = isSafeFenceBreak(spans, breakIdx)
      ? undefined
@@ -335,38 +345,38 @@ export function chunkMarkdownText(text: string, limit: number): string[] {
    let fenceToSplit = initialFence;
    if (initialFence) {
      const closeLine = `${initialFence.indent}${initialFence.marker}`;
-      const maxIdxIfNeedNewline = limit - (closeLine.length + 1);
+      const maxIdxIfNeedNewline = start + (contentLimit - (closeLine.length + 1));

-      if (maxIdxIfNeedNewline <= 0) {
+      if (maxIdxIfNeedNewline <= start) {
        fenceToSplit = undefined;
-        breakIdx = limit;
+        breakIdx = windowEnd;
      } else {
        const minProgressIdx = Math.min(
-          remaining.length,
-          initialFence.start + initialFence.openLine.length + 2,
+          text.length,
+          Math.max(start + 1, initialFence.start + initialFence.openLine.length + 2),
        );
-        const maxIdxIfAlreadyNewline = limit - closeLine.length;
+        const maxIdxIfAlreadyNewline = start + (contentLimit - closeLine.length);

        let pickedNewline = false;
-        let lastNewline = remaining.lastIndexOf("\n", Math.max(0, maxIdxIfAlreadyNewline - 1));
-        while (lastNewline !== -1) {
+        let lastNewline = text.lastIndexOf("\n", Math.max(start, maxIdxIfAlreadyNewline - 1));
+        while (lastNewline >= start) {
          const candidateBreak = lastNewline + 1;
          if (candidateBreak < minProgressIdx) {
            break;
          }
          const candidateFence = findFenceSpanAt(spans, candidateBreak);
          if (candidateFence && candidateFence.start === initialFence.start) {
-            breakIdx = Math.max(1, candidateBreak);
+            breakIdx = candidateBreak;
            pickedNewline = true;
            break;
          }
-          lastNewline = remaining.lastIndexOf("\n", lastNewline - 1);
+          lastNewline = text.lastIndexOf("\n", lastNewline - 1);
        }

        if (!pickedNewline) {
          if (minProgressIdx > maxIdxIfAlreadyNewline) {
            fenceToSplit = undefined;
-            breakIdx = limit;
+            breakIdx = windowEnd;
          } else {
            breakIdx = Math.max(minProgressIdx, maxIdxIfNeedNewline);
          }
@@ -378,68 +388,72 @@ export function chunkMarkdownText(text: string, limit: number): string[] {
        fenceAtBreak && fenceAtBreak.start === initialFence.start ? fenceAtBreak : undefined;
    }

-    let rawChunk = remaining.slice(0, breakIdx);
-    if (!rawChunk) {
+    const rawContent = text.slice(start, breakIdx);
+    if (!rawContent) {
      break;
    }

-    const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
-    const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
-    let next = remaining.slice(nextStart);
+    let rawChunk = `${reopenPrefix}${rawContent}`;
+    const brokeOnSeparator = breakIdx < text.length && /\s/.test(text[breakIdx]);
+    let nextStart = Math.min(text.length, breakIdx + (brokeOnSeparator ? 1 : 0));

    if (fenceToSplit) {
      const closeLine = `${fenceToSplit.indent}${fenceToSplit.marker}`;
      rawChunk = rawChunk.endsWith("\n") ? `${rawChunk}${closeLine}` : `${rawChunk}\n${closeLine}`;
-      next = `${fenceToSplit.openLine}\n${next}`;
+      reopenFence = fenceToSplit;
    } else {
-      next = stripLeadingNewlines(next);
+      nextStart = skipLeadingNewlines(text, nextStart);
+      reopenFence = undefined;
    }

    chunks.push(rawChunk);
-    remaining = next;
-  }
-
-  if (remaining.length) {
-    chunks.push(remaining);
+    start = nextStart;
  }
  return chunks;
 }

-function stripLeadingNewlines(value: string): string {
-  let i = 0;
+function skipLeadingNewlines(value: string, start = 0): number {
+  let i = start;
  while (i < value.length && value[i] === "\n") {
    i++;
  }
-  return i > 0 ? value.slice(i) : value;
+  return i;
 }

-function pickSafeBreakIndex(window: string, spans: ReturnType<typeof parseFenceSpans>): number {
-  const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window, (index) =>
+function pickSafeBreakIndex(
+  text: string,
+  start: number,
+  end: number,
+  spans: ReturnType<typeof parseFenceSpans>,
+): number {
+  const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(text, start, end, (index) =>
    isSafeFenceBreak(spans, index),
  );

-  if (lastNewline > 0) {
+  if (lastNewline > start) {
    return lastNewline;
  }
-  if (lastWhitespace > 0) {
+  if (lastWhitespace > start) {
    return lastWhitespace;
  }
  return -1;
 }

 function scanParenAwareBreakpoints(
-  window: string,
+  text: string,
+  start: number,
+  end: number,
  isAllowed: (index: number) => boolean = () => true,
 ): { lastNewline: number; lastWhitespace: number } {
  let lastNewline = -1;
  let lastWhitespace = -1;
  let depth = 0;

-  for (let i = 0; i < window.length; i++) {
+  for (let i = start; i < end; i++) {
    if (!isAllowed(i)) {
      continue;
    }
-    const char = window[i];
+    const char = text[i];
    if (char === "(") {
      depth += 1;
      continue;
--- a/src/markdown/fences.ts
+++ b/src/markdown/fences.ts
@@ -73,7 +73,27 @@ export function parseFenceSpans(buffer: string): FenceSpan[] {
 }

 export function findFenceSpanAt(spans: FenceSpan[], index: number): FenceSpan | undefined {
-  return spans.find((span) => index > span.start && index < span.end);
+  let low = 0;
+  let high = spans.length - 1;
+
+  while (low <= high) {
+    const mid = Math.floor((low + high) / 2);
+    const span = spans[mid];
+    if (!span) {
+      break;
+    }
+    if (index <= span.start) {
+      high = mid - 1;
+      continue;
+    }
+    if (index >= span.end) {
+      low = mid + 1;
+      continue;
+    }
+    return span;
+  }
+
+  return undefined;
 }

 export function isSafeFenceBreak(spans: FenceSpan[], index: number): boolean {