fix(agents): avoid duplicate same-provider cooldown probes in fallback runs (#41711)
Merged via squash. Prepared head SHA: 8be8967bcb4be81f6abc5ff078644ec4efcfe7a0 Co-authored-by: cgdusek <38732970+cgdusek@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf
This commit is contained in:
@@ -59,6 +59,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Mattermost/plugin send actions: normalize direct `replyTo` fallback handling so threaded plugin sends trim blank IDs and reuse the correct reply target again. (#41176) Thanks @hnykda.
|
||||
- MS Teams/allowlist resolution: use the General channel conversation ID as the resolved team key (with Graph GUID fallback) so Bot Framework runtime `channelData.team.id` matching works for team and team/channel allowlist entries. (#41838) Thanks @BradGroux.
|
||||
- Mattermost/Markdown formatting: preserve first-line indentation when stripping bot mentions so nested list items and indented code blocks keep their structure, and render Mattermost tables natively by default instead of fenced-code fallback. (#18655) thanks @echo931.
|
||||
- Agents/fallback cooldown probing: cap cooldown-bypass probing to one attempt per provider per fallback run so multi-model same-provider cooldown chains can continue to cross-provider fallbacks instead of repeatedly stalling on duplicate cooldown probes. (#41711) Thanks @cgdusek.
|
||||
|
||||
## 2026.3.8
|
||||
|
||||
|
||||
@@ -1318,6 +1318,86 @@ describe("runWithModelFallback", () => {
|
||||
}); // Rate limit allows attempt
|
||||
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
|
||||
});
|
||||
|
||||
it("limits cooldown probes to one per provider before moving to cross-provider fallback", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: [
|
||||
"anthropic/claude-sonnet-4-5",
|
||||
"anthropic/claude-haiku-3-5",
|
||||
"groq/llama-3.3-70b-versatile",
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("Still rate limited")) // First same-provider probe fails
|
||||
.mockResolvedValueOnce("groq success"); // Next provider succeeds
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("groq success");
|
||||
// Primary is skipped, first same-provider fallback is probed, second same-provider fallback
|
||||
// is skipped (probe already attempted), then cross-provider fallback runs.
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
|
||||
allowTransientCooldownProbe: true,
|
||||
});
|
||||
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
|
||||
});
|
||||
|
||||
it("does not consume transient probe slot when first same-provider probe fails with model_not_found", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: [
|
||||
"anthropic/claude-sonnet-4-5",
|
||||
"anthropic/claude-haiku-3-5",
|
||||
"groq/llama-3.3-70b-versatile",
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("Model not found: anthropic/claude-sonnet-4-5"))
|
||||
.mockResolvedValueOnce("haiku success");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("haiku success");
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
|
||||
allowTransientCooldownProbe: true,
|
||||
});
|
||||
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5", {
|
||||
allowTransientCooldownProbe: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -521,6 +521,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
: null;
|
||||
const attempts: FallbackAttempt[] = [];
|
||||
let lastError: unknown;
|
||||
const cooldownProbeUsedProviders = new Set<string>();
|
||||
|
||||
const hasFallbackCandidates = candidates.length > 1;
|
||||
|
||||
@@ -531,6 +532,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
params.provider === candidate.provider && params.model === candidate.model;
|
||||
let runOptions: ModelFallbackRunOptions | undefined;
|
||||
let attemptedDuringCooldown = false;
|
||||
let transientProbeProviderForAttempt: string | null = null;
|
||||
if (authStore) {
|
||||
const profileIds = resolveAuthProfileOrder({
|
||||
cfg: params.cfg,
|
||||
@@ -588,7 +590,41 @@ export async function runWithModelFallback<T>(params: {
|
||||
decision.reason === "overloaded" ||
|
||||
decision.reason === "billing"
|
||||
) {
|
||||
// Probe at most once per provider per fallback run when all profiles
|
||||
// are cooldowned. Re-probing every same-provider candidate can stall
|
||||
// cross-provider fallback on providers with long internal retries.
|
||||
const isTransientCooldownReason =
|
||||
decision.reason === "rate_limit" || decision.reason === "overloaded";
|
||||
if (isTransientCooldownReason && cooldownProbeUsedProviders.has(candidate.provider)) {
|
||||
const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error,
|
||||
reason: decision.reason,
|
||||
});
|
||||
logModelFallbackDecision({
|
||||
decision: "skip_candidate",
|
||||
runId: params.runId,
|
||||
requestedProvider: params.provider,
|
||||
requestedModel: params.model,
|
||||
candidate,
|
||||
attempt: i + 1,
|
||||
total: candidates.length,
|
||||
reason: decision.reason,
|
||||
error,
|
||||
nextCandidate: candidates[i + 1],
|
||||
isPrimary,
|
||||
requestedModelMatched: requestedModel,
|
||||
fallbackConfigured: hasFallbackCandidates,
|
||||
profileCount: profileIds.length,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
runOptions = { allowTransientCooldownProbe: true };
|
||||
if (isTransientCooldownReason) {
|
||||
transientProbeProviderForAttempt = candidate.provider;
|
||||
}
|
||||
}
|
||||
attemptedDuringCooldown = true;
|
||||
logModelFallbackDecision({
|
||||
@@ -643,6 +679,18 @@ export async function runWithModelFallback<T>(params: {
|
||||
}
|
||||
const err = attemptRun.error;
|
||||
{
|
||||
if (transientProbeProviderForAttempt) {
|
||||
const probeFailureReason = describeFailoverError(err).reason;
|
||||
const shouldPreserveTransientProbeSlot =
|
||||
probeFailureReason === "model_not_found" ||
|
||||
probeFailureReason === "format" ||
|
||||
probeFailureReason === "auth" ||
|
||||
probeFailureReason === "auth_permanent" ||
|
||||
probeFailureReason === "session_expired";
|
||||
if (!shouldPreserveTransientProbeSlot) {
|
||||
cooldownProbeUsedProviders.add(transientProbeProviderForAttempt);
|
||||
}
|
||||
}
|
||||
// Context overflow errors should be handled by the inner runner's
|
||||
// compaction/retry logic, not by model fallback. If one escapes as a
|
||||
// throw, rethrow it immediately rather than trying a different model
|
||||
|
||||
Reference in New Issue
Block a user