fix: pass proxy-aware fetchFn to media understanding providers

runProviderEntry now calls resolveProxyFetchFromEnv() and passes the result as fetchFn to transcribeAudio/describeVideo, so media provider API calls respect HTTPS_PROXY/HTTP_PROXY behind corporate proxies.
2026-02-28 01:41:32 -03:00
parent ba3fa44c5b
commit 8c1e9949b3
2 changed files with 190 additions and 0 deletions
--- a/src/media-understanding/runner.entries.ts
+++ b/src/media-understanding/runner.entries.ts
@@ -13,6 +13,7 @@ import type {
  MediaUnderstandingModelConfig,
 } from "../config/types.tools.js";
 import { logVerbose, shouldLogVerbose } from "../globals.js";
+import { resolveProxyFetchFromEnv } from "../infra/net/proxy-fetch.js";
 import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
 import { runExec } from "../process/exec.js";
 import { MediaAttachmentCache } from "./attachments.js";
@@ -429,6 +430,10 @@ export async function runProviderEntry(params: {
    throw new Error(`Media provider not available: ${providerId}`);
  }

+  // Resolve proxy-aware fetch from env vars (HTTPS_PROXY, HTTP_PROXY, etc.)
+  // so provider HTTP calls are routed through the proxy when configured.
+  const fetchFn = resolveProxyFetchFromEnv();
+
  if (capability === "audio") {
    if (!provider.transcribeAudio) {
      throw new Error(`Audio transcription provider "${providerId}" not available.`);
@@ -468,6 +473,7 @@ export async function runProviderEntry(params: {
          prompt,
          query: providerQuery,
          timeoutMs,
+          fetchFn,
        }),
    });
    return {
@@ -517,6 +523,7 @@ export async function runProviderEntry(params: {
        model: entry.model,
        prompt,
        timeoutMs,
+        fetchFn,
      }),
  });
  return {
--- a/src/media-understanding/runner.proxy.test.ts
+++ b/src/media-understanding/runner.proxy.test.ts
@@ -0,0 +1,183 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { OpenClawConfig } from "../config/config.js";
+import { buildProviderRegistry, runCapability } from "./runner.js";
+import { withAudioFixture, withMediaFixture } from "./runner.test-utils.js";
+import type { AudioTranscriptionRequest, VideoDescriptionRequest } from "./types.js";
+
+async function withVideoFixture(
+  filePrefix: string,
+  run: (params: {
+    ctx: { MediaPath: string; MediaType: string };
+    media: ReturnType<typeof import("./runner.js").normalizeMediaAttachments>;
+    cache: ReturnType<typeof import("./runner.js").createMediaAttachmentCache>;
+  }) => Promise<void>,
+) {
+  await withMediaFixture(
+    {
+      filePrefix,
+      extension: "mp4",
+      mediaType: "video/mp4",
+      fileContents: Buffer.from("video"),
+    },
+    run,
+  );
+}
+
+describe("runCapability proxy fetch passthrough", () => {
+  beforeEach(() => vi.clearAllMocks());
+  afterEach(() => vi.unstubAllEnvs());
+
+  it("passes fetchFn to audio provider when HTTPS_PROXY is set", async () => {
+    vi.stubEnv("HTTPS_PROXY", "http://proxy.test:8080");
+
+    await withAudioFixture("openclaw-audio-proxy", async ({ ctx, media, cache }) => {
+      let seenFetchFn: typeof fetch | undefined;
+
+      const providerRegistry = buildProviderRegistry({
+        openai: {
+          id: "openai",
+          capabilities: ["audio"],
+          transcribeAudio: async (req: AudioTranscriptionRequest) => {
+            seenFetchFn = req.fetchFn;
+            return { text: "transcribed", model: req.model };
+          },
+        },
+      });
+
+      const cfg = {
+        models: {
+          providers: {
+            openai: {
+              apiKey: "test-key",
+              models: [],
+            },
+          },
+        },
+        tools: {
+          media: {
+            audio: {
+              enabled: true,
+              models: [{ provider: "openai", model: "whisper-1" }],
+            },
+          },
+        },
+      } as unknown as OpenClawConfig;
+
+      const result = await runCapability({
+        capability: "audio",
+        cfg,
+        ctx,
+        attachments: cache,
+        media,
+        providerRegistry,
+      });
+
+      expect(result.outputs[0]?.text).toBe("transcribed");
+      expect(seenFetchFn).toBeDefined();
+      expect(seenFetchFn).not.toBe(globalThis.fetch);
+    });
+  });
+
+  it("passes fetchFn to video provider when HTTPS_PROXY is set", async () => {
+    vi.stubEnv("HTTPS_PROXY", "http://proxy.test:8080");
+
+    await withVideoFixture("openclaw-video-proxy", async ({ ctx, media, cache }) => {
+      let seenFetchFn: typeof fetch | undefined;
+
+      const result = await runCapability({
+        capability: "video",
+        cfg: {
+          models: {
+            providers: {
+              moonshot: {
+                apiKey: "test-key",
+                models: [],
+              },
+            },
+          },
+          tools: {
+            media: {
+              video: {
+                enabled: true,
+                models: [{ provider: "moonshot", model: "kimi-k2.5" }],
+              },
+            },
+          },
+        } as unknown as OpenClawConfig,
+        ctx,
+        attachments: cache,
+        media,
+        providerRegistry: new Map([
+          [
+            "moonshot",
+            {
+              id: "moonshot",
+              capabilities: ["video"],
+              describeVideo: async (req: VideoDescriptionRequest) => {
+                seenFetchFn = req.fetchFn;
+                return { text: "video ok", model: req.model };
+              },
+            },
+          ],
+        ]),
+      });
+
+      expect(result.outputs[0]?.text).toBe("video ok");
+      expect(seenFetchFn).toBeDefined();
+      expect(seenFetchFn).not.toBe(globalThis.fetch);
+    });
+  });
+
+  it("does not pass fetchFn when no proxy env vars are set", async () => {
+    vi.stubEnv("HTTPS_PROXY", "");
+    vi.stubEnv("HTTP_PROXY", "");
+    vi.stubEnv("https_proxy", "");
+    vi.stubEnv("http_proxy", "");
+
+    await withAudioFixture("openclaw-audio-no-proxy", async ({ ctx, media, cache }) => {
+      let seenFetchFn: typeof fetch | undefined;
+
+      const providerRegistry = buildProviderRegistry({
+        openai: {
+          id: "openai",
+          capabilities: ["audio"],
+          transcribeAudio: async (req: AudioTranscriptionRequest) => {
+            seenFetchFn = req.fetchFn;
+            return { text: "ok", model: req.model };
+          },
+        },
+      });
+
+      const cfg = {
+        models: {
+          providers: {
+            openai: {
+              apiKey: "test-key",
+              models: [],
+            },
+          },
+        },
+        tools: {
+          media: {
+            audio: {
+              enabled: true,
+              models: [{ provider: "openai", model: "whisper-1" }],
+            },
+          },
+        },
+      } as unknown as OpenClawConfig;
+
+      const result = await runCapability({
+        capability: "audio",
+        cfg,
+        ctx,
+        attachments: cache,
+        media,
+        providerRegistry,
+      });
+
+      expect(result.outputs[0]?.text).toBe("ok");
+      expect(seenFetchFn).toBeUndefined();
+    });
+  });
+});