diff --git a/CHANGELOG.md b/CHANGELOG.md
index 93483d148..f1724d98c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai
 - ACP/regressions: add gateway RPC coverage for ACP lineage patching, ACPX runtime coverage for image prompt serialization, and an operator smoke-test procedure for live ACP spawn verification. (#41456) Thanks @mbelinky.
 - Agents/billing recovery: probe single-provider billing cooldowns on the existing throttle so topping up credits can recover without a manual gateway restart. (#41422) thanks @altaywtf.
 - ACP/follow-up hardening: make session restore and prompt completion degrade gracefully on transcript/update failures, enforce bounded tool-location traversal, and skip non-image ACPX turns the runtime cannot serialize. (#41464) Thanks @mbelinky.
+- Agents/fallback observability: add structured, sanitized model-fallback decision and auth-profile failure-state events with correlated run IDs so cooldown probes and failover paths are easier to trace in logs. (#41337) thanks @altaywtf.
 
 ## 2026.3.8
 
diff --git a/src/agents/auth-profiles/state-observation.test.ts b/src/agents/auth-profiles/state-observation.test.ts
new file mode 100644
index 000000000..05f2abfff
--- /dev/null
+++ b/src/agents/auth-profiles/state-observation.test.ts
@@ -0,0 +1,38 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { resetLogger, setLoggerOverride } from "../../logging/logger.js";
+import { logAuthProfileFailureStateChange } from "./state-observation.js";
+
+afterEach(() => {
+  setLoggerOverride(null);
+  resetLogger();
+});
+
+describe("logAuthProfileFailureStateChange", () => {
+  it("sanitizes consoleMessage fields before logging", () => {
+    const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+    setLoggerOverride({ level: "silent", consoleLevel: "warn" });
+
+    logAuthProfileFailureStateChange({
+      runId: "run-1\nforged\tentry\rtest",
+      profileId: "openai:profile-1",
+      provider: "openai\u001b]8;;https://evil.test\u0007",
+      reason: "overloaded",
+      previous: undefined,
+      next: {
+        errorCount: 1,
+        cooldownUntil: 1_700_000_060_000,
+        failureCounts: { overloaded: 1 },
+      },
+      now: 1_700_000_000_000,
+    });
+
+    const consoleLine = warnSpy.mock.calls[0]?.[0];
+    expect(typeof consoleLine).toBe("string");
+    expect(consoleLine).toContain("runId=run-1 forged entry test");
+    expect(consoleLine).toContain("provider=openai]8;;https://evil.test");
+    expect(consoleLine).not.toContain("\n");
+    expect(consoleLine).not.toContain("\r");
+    expect(consoleLine).not.toContain("\t");
+    expect(consoleLine).not.toContain("\u001b");
+  });
+});
diff --git a/src/agents/auth-profiles/state-observation.ts b/src/agents/auth-profiles/state-observation.ts
new file mode 100644
index 000000000..633bdc003
--- /dev/null
+++ b/src/agents/auth-profiles/state-observation.ts
@@ -0,0 +1,59 @@
+import { redactIdentifier } from "../../logging/redact-identifier.js";
+import { createSubsystemLogger } from "../../logging/subsystem.js";
+import { sanitizeForConsole } from "../pi-embedded-error-observation.js";
+import type { AuthProfileFailureReason, ProfileUsageStats } from "./types.js";
+
+const observationLog = createSubsystemLogger("agent/embedded");
+
+export function logAuthProfileFailureStateChange(params: {
+  runId?: string;
+  profileId: string;
+  provider: string;
+  reason: AuthProfileFailureReason;
+  previous: ProfileUsageStats | undefined;
+  next: ProfileUsageStats;
+  now: number;
+}): void {
+  const windowType =
+    params.reason === "billing" || params.reason === "auth_permanent" ? "disabled" : "cooldown";
+  const previousCooldownUntil = params.previous?.cooldownUntil;
+  const previousDisabledUntil = params.previous?.disabledUntil;
+  // Active cooldown/disable windows are intentionally immutable; log whether this
+  // update reused the existing window instead of extending it.
+  const windowReused =
+    windowType === "disabled"
+      ? typeof previousDisabledUntil === "number" &&
+        Number.isFinite(previousDisabledUntil) &&
+        previousDisabledUntil > params.now &&
+        previousDisabledUntil === params.next.disabledUntil
+      : typeof previousCooldownUntil === "number" &&
+        Number.isFinite(previousCooldownUntil) &&
+        previousCooldownUntil > params.now &&
+        previousCooldownUntil === params.next.cooldownUntil;
+  const safeProfileId = redactIdentifier(params.profileId, { len: 12 });
+  const safeRunId = sanitizeForConsole(params.runId) ?? "-";
+  const safeProvider = sanitizeForConsole(params.provider) ?? "-";
+
+  observationLog.warn("auth profile failure state updated", {
+    event: "auth_profile_failure_state_updated",
+    tags: ["error_handling", "auth_profiles", windowType],
+    runId: params.runId,
+    profileId: safeProfileId,
+    provider: params.provider,
+    reason: params.reason,
+    windowType,
+    windowReused,
+    previousErrorCount: params.previous?.errorCount,
+    errorCount: params.next.errorCount,
+    previousCooldownUntil,
+    cooldownUntil: params.next.cooldownUntil,
+    previousDisabledUntil,
+    disabledUntil: params.next.disabledUntil,
+    previousDisabledReason: params.previous?.disabledReason,
+    disabledReason: params.next.disabledReason,
+    failureCounts: params.next.failureCounts,
+    consoleMessage:
+      `auth profile failure state updated: runId=${safeRunId} profile=${safeProfileId} provider=${safeProvider} ` +
+      `reason=${params.reason} window=${windowType} reused=${String(windowReused)}`,
+  });
+}
diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index 0d9ae6a6a..273fd7545 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -1,5 +1,6 @@
 import type { OpenClawConfig } from "../../config/config.js";
 import { normalizeProviderId } from "../model-selection.js";
+import { logAuthProfileFailureStateChange } from "./state-observation.js";
 import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js";
 import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js";
 
@@ -462,12 +463,16 @@ export async function markAuthProfileFailure(params: {
   reason: AuthProfileFailureReason;
   cfg?: OpenClawConfig;
   agentDir?: string;
+  runId?: string;
 }): Promise<void> {
-  const { store, profileId, reason, agentDir, cfg } = params;
+  const { store, profileId, reason, agentDir, cfg, runId } = params;
   const profile = store.profiles[profileId];
   if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
     return;
   }
+  let nextStats: ProfileUsageStats | undefined;
+  let previousStats: ProfileUsageStats | undefined;
+  let updateTime = 0;
   const updated = await updateAuthProfileStoreWithLock({
     agentDir,
     updater: (freshStore) => {
@@ -482,19 +487,32 @@ export async function markAuthProfileFailure(params: {
         providerId: providerKey,
       });
 
-      updateUsageStatsEntry(freshStore, profileId, (existing) =>
-        computeNextProfileUsageStats({
-          existing: existing ?? {},
-          now,
-          reason,
-          cfgResolved,
-        }),
-      );
+      previousStats = freshStore.usageStats?.[profileId];
+      updateTime = now;
+      const computed = computeNextProfileUsageStats({
+        existing: previousStats ?? {},
+        now,
+        reason,
+        cfgResolved,
+      });
+      nextStats = computed;
+      updateUsageStatsEntry(freshStore, profileId, () => computed);
       return true;
     },
   });
   if (updated) {
     store.usageStats = updated.usageStats;
+    if (nextStats) {
+      logAuthProfileFailureStateChange({
+        runId,
+        profileId,
+        provider: profile.provider,
+        reason,
+        previous: previousStats,
+        next: nextStats,
+        now: updateTime,
+      });
+    }
     return;
   }
   if (!store.profiles[profileId]) {
@@ -508,15 +526,25 @@ export async function markAuthProfileFailure(params: {
     providerId: providerKey,
   });
 
-  updateUsageStatsEntry(store, profileId, (existing) =>
-    computeNextProfileUsageStats({
-      existing: existing ?? {},
-      now,
-      reason,
-      cfgResolved,
-    }),
-  );
+  previousStats = store.usageStats?.[profileId];
+  const computed = computeNextProfileUsageStats({
+    existing: previousStats ?? {},
+    now,
+    reason,
+    cfgResolved,
+  });
+  nextStats = computed;
+  updateUsageStatsEntry(store, profileId, () => computed);
   saveAuthProfileStore(store, agentDir);
+  logAuthProfileFailureStateChange({
+    runId,
+    profileId,
+    provider: store.profiles[profileId]?.provider ?? profile.provider,
+    reason,
+    previous: previousStats,
+    next: nextStats,
+    now,
+  });
 }
 
 /**
@@ -528,12 +556,14 @@ export async function markAuthProfileCooldown(params: {
   store: AuthProfileStore;
   profileId: string;
   agentDir?: string;
+  runId?: string;
 }): Promise<void> {
   await markAuthProfileFailure({
     store: params.store,
     profileId: params.profileId,
     reason: "unknown",
     agentDir: params.agentDir,
+    runId: params.runId,
   });
 }
 
diff --git a/src/agents/model-fallback-observation.ts b/src/agents/model-fallback-observation.ts
new file mode 100644
index 000000000..450e047c7
--- /dev/null
+++ b/src/agents/model-fallback-observation.ts
@@ -0,0 +1,93 @@
+import { createSubsystemLogger } from "../logging/subsystem.js";
+import { sanitizeForLog } from "../terminal/ansi.js";
+import type { FallbackAttempt, ModelCandidate } from "./model-fallback.types.js";
+import { buildTextObservationFields } from "./pi-embedded-error-observation.js";
+import type { FailoverReason } from "./pi-embedded-helpers.js";
+
+const decisionLog = createSubsystemLogger("model-fallback").child("decision");
+
+function buildErrorObservationFields(error?: string): {
+  errorPreview?: string;
+  errorHash?: string;
+  errorFingerprint?: string;
+  httpCode?: string;
+  providerErrorType?: string;
+  providerErrorMessagePreview?: string;
+  requestIdHash?: string;
+} {
+  const observed = buildTextObservationFields(error);
+  return {
+    errorPreview: observed.textPreview,
+    errorHash: observed.textHash,
+    errorFingerprint: observed.textFingerprint,
+    httpCode: observed.httpCode,
+    providerErrorType: observed.providerErrorType,
+    providerErrorMessagePreview: observed.providerErrorMessagePreview,
+    requestIdHash: observed.requestIdHash,
+  };
+}
+
+export function logModelFallbackDecision(params: {
+  decision:
+    | "skip_candidate"
+    | "probe_cooldown_candidate"
+    | "candidate_failed"
+    | "candidate_succeeded";
+  runId?: string;
+  requestedProvider: string;
+  requestedModel: string;
+  candidate: ModelCandidate;
+  attempt?: number;
+  total?: number;
+  reason?: FailoverReason | null;
+  status?: number;
+  code?: string;
+  error?: string;
+  nextCandidate?: ModelCandidate;
+  isPrimary?: boolean;
+  requestedModelMatched?: boolean;
+  fallbackConfigured?: boolean;
+  allowTransientCooldownProbe?: boolean;
+  profileCount?: number;
+  previousAttempts?: FallbackAttempt[];
+}): void {
+  const nextText = params.nextCandidate
+    ? `${sanitizeForLog(params.nextCandidate.provider)}/${sanitizeForLog(params.nextCandidate.model)}`
+    : "none";
+  const reasonText = params.reason ?? "unknown";
+  const observedError = buildErrorObservationFields(params.error);
+  decisionLog.warn("model fallback decision", {
+    event: "model_fallback_decision",
+    tags: ["error_handling", "model_fallback", params.decision],
+    runId: params.runId,
+    decision: params.decision,
+    requestedProvider: params.requestedProvider,
+    requestedModel: params.requestedModel,
+    candidateProvider: params.candidate.provider,
+    candidateModel: params.candidate.model,
+    attempt: params.attempt,
+    total: params.total,
+    reason: params.reason,
+    status: params.status,
+    code: params.code,
+    ...observedError,
+    nextCandidateProvider: params.nextCandidate?.provider,
+    nextCandidateModel: params.nextCandidate?.model,
+    isPrimary: params.isPrimary,
+    requestedModelMatched: params.requestedModelMatched,
+    fallbackConfigured: params.fallbackConfigured,
+    allowTransientCooldownProbe: params.allowTransientCooldownProbe,
+    profileCount: params.profileCount,
+    previousAttempts: params.previousAttempts?.map((attempt) => ({
+      provider: attempt.provider,
+      model: attempt.model,
+      reason: attempt.reason,
+      status: attempt.status,
+      code: attempt.code,
+      ...buildErrorObservationFields(attempt.error),
+    })),
+    consoleMessage:
+      `model fallback decision: decision=${params.decision} requested=${sanitizeForLog(params.requestedProvider)}/${sanitizeForLog(params.requestedModel)} ` +
+      `candidate=${sanitizeForLog(params.candidate.provider)}/${sanitizeForLog(params.candidate.model)} reason=${reasonText} next=${nextText}`,
+  });
+}
diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts
index 9426eba6a..d08bd0d4b 100644
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -1,5 +1,8 @@
+import os from "node:os";
+import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/config.js";
+import { registerLogTransport, resetLogger, setLoggerOverride } from "../logging/logger.js";
 import type { AuthProfileStore } from "./auth-profiles.js";
 import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js";
 
@@ -28,6 +31,7 @@ const mockedResolveProfilesUnavailableReason = vi.mocked(resolveProfilesUnavaila
 const mockedResolveAuthProfileOrder = vi.mocked(resolveAuthProfileOrder);
 
 const makeCfg = makeModelFallbackCfg;
+let unregisterLogTransport: (() => void) | undefined;
 
 function expectFallbackUsed(
   result: { result: unknown; attempts: Array<{ reason?: string }> },
@@ -149,6 +153,10 @@ describe("runWithModelFallback – probe logic", () => {
 
   afterEach(() => {
     Date.now = realDateNow;
+    unregisterLogTransport?.();
+    unregisterLogTransport = undefined;
+    setLoggerOverride(null);
+    resetLogger();
     vi.restoreAllMocks();
   });
 
@@ -194,6 +202,99 @@ describe("runWithModelFallback – probe logic", () => {
     expectPrimaryProbeSuccess(result, run, "probed-ok");
   });
 
+  it("logs primary metadata on probe success and failure fallback decisions", async () => {
+    const cfg = makeCfg();
+    const records: Array<Record<string, unknown>> = [];
+    mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 60 * 1000);
+    setLoggerOverride({
+      level: "trace",
+      consoleLevel: "silent",
+      file: path.join(os.tmpdir(), `openclaw-model-fallback-probe-${Date.now()}.log`),
+    });
+    unregisterLogTransport = registerLogTransport((record) => {
+      records.push(record);
+    });
+
+    const run = vi.fn().mockResolvedValue("probed-ok");
+
+    const result = await runPrimaryCandidate(cfg, run);
+
+    expectPrimaryProbeSuccess(result, run, "probed-ok");
+
+    _probeThrottleInternals.lastProbeAttempt.clear();
+
+    const fallbackCfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "openai/gpt-4.1-mini",
+            fallbacks: ["anthropic/claude-haiku-3-5", "google/gemini-2-flash"],
+          },
+        },
+      },
+    } as Partial<OpenClawConfig>);
+    mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 60 * 1000);
+    const fallbackRun = vi
+      .fn()
+      .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
+      .mockResolvedValueOnce("fallback-ok");
+
+    const fallbackResult = await runPrimaryCandidate(fallbackCfg, fallbackRun);
+
+    expect(fallbackResult.result).toBe("fallback-ok");
+    expect(fallbackRun).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini", {
+      allowTransientCooldownProbe: true,
+    });
+    expect(fallbackRun).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5");
+
+    const decisionPayloads = records
+      .filter(
+        (record) =>
+          record["2"] === "model fallback decision" &&
+          record["1"] &&
+          typeof record["1"] === "object",
+      )
+      .map((record) => record["1"] as Record<string, unknown>);
+
+    expect(decisionPayloads).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          event: "model_fallback_decision",
+          decision: "probe_cooldown_candidate",
+          candidateProvider: "openai",
+          candidateModel: "gpt-4.1-mini",
+          allowTransientCooldownProbe: true,
+        }),
+        expect.objectContaining({
+          event: "model_fallback_decision",
+          decision: "candidate_succeeded",
+          candidateProvider: "openai",
+          candidateModel: "gpt-4.1-mini",
+          isPrimary: true,
+          requestedModelMatched: true,
+        }),
+        expect.objectContaining({
+          event: "model_fallback_decision",
+          decision: "candidate_failed",
+          candidateProvider: "openai",
+          candidateModel: "gpt-4.1-mini",
+          isPrimary: true,
+          requestedModelMatched: true,
+          nextCandidateProvider: "anthropic",
+          nextCandidateModel: "claude-haiku-3-5",
+        }),
+        expect.objectContaining({
+          event: "model_fallback_decision",
+          decision: "candidate_succeeded",
+          candidateProvider: "anthropic",
+          candidateModel: "claude-haiku-3-5",
+          isPrimary: false,
+          requestedModelMatched: false,
+        }),
+      ]),
+    );
+  });
+
   it("probes primary model when cooldown already expired", async () => {
     const cfg = makeCfg();
     // Cooldown expired 5 min ago
diff --git a/src/agents/model-fallback.run-embedded.e2e.test.ts b/src/agents/model-fallback.run-embedded.e2e.test.ts
index 2e5a8202e..504b14571 100644
--- a/src/agents/model-fallback.run-embedded.e2e.test.ts
+++ b/src/agents/model-fallback.run-embedded.e2e.test.ts
@@ -207,6 +207,7 @@ async function runEmbeddedFallback(params: {
     cfg,
     provider: "openai",
     model: "mock-1",
+    runId: params.runId,
     agentDir: params.agentDir,
     run: (provider, model, options) =>
       runEmbeddedPiAgent({
diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts
index c99d0a9be..e4c84028e 100644
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -536,7 +536,9 @@ describe("runWithModelFallback", () => {
       });
 
       expect(result.result).toBe("ok");
-      const warning = warnSpy.mock.calls[0]?.[0] as string;
+      const warning = warnSpy.mock.calls
+        .map((call) => call[0] as string)
+        .find((value) => value.includes('Model "openai/gpt-6spoof" not found'));
       expect(warning).toContain('Model "openai/gpt-6spoof" not found');
       expect(warning).not.toContain("\u001B");
       expect(warning).not.toContain("\n");
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index b9ff9d668..373e10c93 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -19,6 +19,8 @@ import {
   isFailoverError,
   isTimeoutError,
 } from "./failover-error.js";
+import { logModelFallbackDecision } from "./model-fallback-observation.js";
+import type { FallbackAttempt, ModelCandidate } from "./model-fallback.types.js";
 import {
   buildConfiguredAllowlistKeys,
   buildModelAliasIndex,
@@ -32,11 +34,6 @@ import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js";
 
 const log = createSubsystemLogger("model-fallback");
 
-type ModelCandidate = {
-  provider: string;
-  model: string;
-};
-
 export type ModelFallbackRunOptions = {
   allowTransientCooldownProbe?: boolean;
 };
@@ -47,15 +44,6 @@ type ModelFallbackRunFn<T> = (
   options?: ModelFallbackRunOptions,
 ) => Promise<T>;
 
-type FallbackAttempt = {
-  provider: string;
-  model: string;
-  error: string;
-  reason?: FailoverReason;
-  status?: number;
-  code?: string;
-};
-
 /**
  * Fallback abort check. Only treats explicit AbortError names as user aborts.
  * Message-based checks (e.g., "aborted") can mask timeouts and skip fallback.
@@ -515,6 +503,7 @@ export async function runWithModelFallback<T>(params: {
   cfg: OpenClawConfig | undefined;
   provider: string;
   model: string;
+  runId?: string;
   agentDir?: string;
   /** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */
   fallbacksOverride?: string[];
@@ -537,7 +526,11 @@ export async function runWithModelFallback<T>(params: {
 
   for (let i = 0; i < candidates.length; i += 1) {
     const candidate = candidates[i];
+    const isPrimary = i === 0;
+    const requestedModel =
+      params.provider === candidate.provider && params.model === candidate.model;
     let runOptions: ModelFallbackRunOptions | undefined;
+    let attemptedDuringCooldown = false;
     if (authStore) {
       const profileIds = resolveAuthProfileOrder({
         cfg: params.cfg,
@@ -548,9 +541,6 @@ export async function runWithModelFallback<T>(params: {
 
       if (profileIds.length > 0 && !isAnyProfileAvailable) {
         // All profiles for this provider are in cooldown.
-        const isPrimary = i === 0;
-        const requestedModel =
-          params.provider === candidate.provider && params.model === candidate.model;
         const now = Date.now();
         const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir);
         const decision = resolveCooldownDecision({
@@ -571,6 +561,22 @@ export async function runWithModelFallback<T>(params: {
             error: decision.error,
             reason: decision.reason,
           });
+          logModelFallbackDecision({
+            decision: "skip_candidate",
+            runId: params.runId,
+            requestedProvider: params.provider,
+            requestedModel: params.model,
+            candidate,
+            attempt: i + 1,
+            total: candidates.length,
+            reason: decision.reason,
+            error: decision.error,
+            nextCandidate: candidates[i + 1],
+            isPrimary,
+            requestedModelMatched: requestedModel,
+            fallbackConfigured: hasFallbackCandidates,
+            profileCount: profileIds.length,
+          });
           continue;
         }
 
@@ -584,6 +590,23 @@ export async function runWithModelFallback<T>(params: {
         ) {
           runOptions = { allowTransientCooldownProbe: true };
         }
+        attemptedDuringCooldown = true;
+        logModelFallbackDecision({
+          decision: "probe_cooldown_candidate",
+          runId: params.runId,
+          requestedProvider: params.provider,
+          requestedModel: params.model,
+          candidate,
+          attempt: i + 1,
+          total: candidates.length,
+          reason: decision.reason,
+          nextCandidate: candidates[i + 1],
+          isPrimary,
+          requestedModelMatched: requestedModel,
+          fallbackConfigured: hasFallbackCandidates,
+          allowTransientCooldownProbe: runOptions?.allowTransientCooldownProbe,
+          profileCount: profileIds.length,
+        });
       }
     }
 
@@ -594,6 +617,21 @@ export async function runWithModelFallback<T>(params: {
       options: runOptions,
     });
     if ("success" in attemptRun) {
+      if (i > 0 || attempts.length > 0 || attemptedDuringCooldown) {
+        logModelFallbackDecision({
+          decision: "candidate_succeeded",
+          runId: params.runId,
+          requestedProvider: params.provider,
+          requestedModel: params.model,
+          candidate,
+          attempt: i + 1,
+          total: candidates.length,
+          previousAttempts: attempts,
+          isPrimary,
+          requestedModelMatched: requestedModel,
+          fallbackConfigured: hasFallbackCandidates,
+        });
+      }
       const notFoundAttempt =
         i > 0 ? attempts.find((a) => a.reason === "model_not_found") : undefined;
       if (notFoundAttempt) {
@@ -637,6 +675,23 @@ export async function runWithModelFallback<T>(params: {
         status: described.status,
         code: described.code,
       });
+      logModelFallbackDecision({
+        decision: "candidate_failed",
+        runId: params.runId,
+        requestedProvider: params.provider,
+        requestedModel: params.model,
+        candidate,
+        attempt: i + 1,
+        total: candidates.length,
+        reason: described.reason,
+        status: described.status,
+        code: described.code,
+        error: described.message,
+        nextCandidate: candidates[i + 1],
+        isPrimary,
+        requestedModelMatched: requestedModel,
+        fallbackConfigured: hasFallbackCandidates,
+      });
       await params.onError?.({
         provider: candidate.provider,
         model: candidate.model,
diff --git a/src/agents/model-fallback.types.ts b/src/agents/model-fallback.types.ts
new file mode 100644
index 000000000..92b5f9747
--- /dev/null
+++ b/src/agents/model-fallback.types.ts
@@ -0,0 +1,15 @@
+import type { FailoverReason } from "./pi-embedded-helpers.js";
+
+export type ModelCandidate = {
+  provider: string;
+  model: string;
+};
+
+export type FallbackAttempt = {
+  provider: string;
+  model: string;
+  error: string;
+  reason?: FailoverReason;
+  status?: number;
+  code?: string;
+};
diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
index 432ae17da..2d658aada 100644
--- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
+++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
@@ -2,8 +2,10 @@ import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import type { AssistantMessage } from "@mariozechner/pi-ai";
-import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
+import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/config.js";
+import { registerLogTransport, resetLogger, setLoggerOverride } from "../logging/logger.js";
+import { redactIdentifier } from "../logging/redact-identifier.js";
 import type { AuthProfileFailureReason } from "./auth-profiles.js";
 import type { EmbeddedRunAttemptResult } from "./pi-embedded-runner/run/types.js";
 
@@ -51,6 +53,7 @@ vi.mock("./models-config.js", async (importOriginal) => {
 });
 
 let runEmbeddedPiAgent: typeof import("./pi-embedded-runner/run.js").runEmbeddedPiAgent;
+let unregisterLogTransport: (() => void) | undefined;
 
 beforeAll(async () => {
   ({ runEmbeddedPiAgent } = await import("./pi-embedded-runner/run.js"));
@@ -64,6 +67,13 @@ beforeEach(() => {
   sleepWithAbortMock.mockClear();
 });
 
+afterEach(() => {
+  unregisterLogTransport?.();
+  unregisterLogTransport = undefined;
+  setLoggerOverride(null);
+  resetLogger();
+});
+
 const baseUsage = {
   input: 0,
   output: 0,
@@ -720,6 +730,61 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
     expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
   });
 
+  it("logs structured failover decision metadata for overloaded assistant rotation", async () => {
+    const records: Array<Record<string, unknown>> = [];
+    setLoggerOverride({
+      level: "trace",
+      consoleLevel: "silent",
+      file: path.join(os.tmpdir(), `openclaw-auth-rotation-${Date.now()}.log`),
+    });
+    unregisterLogTransport = registerLogTransport((record) => {
+      records.push(record);
+    });
+
+    await runAutoPinnedRotationCase({
+      errorMessage:
+        '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"},"request_id":"req_overload"}',
+      sessionKey: "agent:test:overloaded-logging",
+      runId: "run:overloaded-logging",
+    });
+
+    const decisionRecord = records.find(
+      (record) =>
+        record["2"] === "embedded run failover decision" &&
+        record["1"] &&
+        typeof record["1"] === "object" &&
+        (record["1"] as Record<string, unknown>).decision === "rotate_profile",
+    );
+
+    expect(decisionRecord).toBeDefined();
+    const safeProfileId = redactIdentifier("openai:p1", { len: 12 });
+    expect((decisionRecord as Record<string, unknown>)["1"]).toMatchObject({
+      event: "embedded_run_failover_decision",
+      runId: "run:overloaded-logging",
+      decision: "rotate_profile",
+      failoverReason: "overloaded",
+      profileId: safeProfileId,
+      providerErrorType: "overloaded_error",
+      rawErrorPreview: expect.stringContaining('"request_id":"sha256:'),
+    });
+
+    const stateRecord = records.find(
+      (record) =>
+        record["2"] === "auth profile failure state updated" &&
+        record["1"] &&
+        typeof record["1"] === "object" &&
+        (record["1"] as Record<string, unknown>).profileId === safeProfileId,
+    );
+
+    expect(stateRecord).toBeDefined();
+    expect((stateRecord as Record<string, unknown>)["1"]).toMatchObject({
+      event: "auth_profile_failure_state_updated",
+      runId: "run:overloaded-logging",
+      profileId: safeProfileId,
+      reason: "overloaded",
+    });
+  });
+
   it("rotates for overloaded prompt failures across auto-pinned profiles", async () => {
     const { usageStats } = await runAutoPinnedPromptErrorRotationCase({
       errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index 68677a009..381c76ada 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -763,6 +763,7 @@ export async function runEmbeddedPiAgent(
           reason,
           cfg: params.config,
           agentDir,
+          runId: params.runId,
         });
       };
       const resolveAuthProfileFailureReason = (
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index 6748e3cbe..a3b31c4cc 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -199,6 +199,7 @@ export async function runAgentTurnWithFallback(params: {
       const onToolResult = params.opts?.onToolResult;
       const fallbackResult = await runWithModelFallback({
         ...resolveModelFallbackOptions(params.followupRun.run),
+        runId,
         run: (provider, model, runOptions) => {
           // Notify that model selection is complete (including after fallback).
           // This allows responsePrefix template interpolation with the actual model.
diff --git a/src/auto-reply/reply/agent-runner-memory.ts b/src/auto-reply/reply/agent-runner-memory.ts
index 374d37d52..643611d35 100644
--- a/src/auto-reply/reply/agent-runner-memory.ts
+++ b/src/auto-reply/reply/agent-runner-memory.ts
@@ -474,6 +474,7 @@ export async function runMemoryFlushIfNeeded(params: {
   try {
     await runWithModelFallback({
       ...resolveModelFallbackOptions(params.followupRun.run),
+      runId: flushRunId,
       run: async (provider, model, runOptions) => {
         const { authProfile, embeddedContext, senderContext } = buildEmbeddedRunContexts({
           run: params.followupRun.run,
diff --git a/src/auto-reply/reply/followup-runner.ts b/src/auto-reply/reply/followup-runner.ts
index 91e781381..8c7eccb5f 100644
--- a/src/auto-reply/reply/followup-runner.ts
+++ b/src/auto-reply/reply/followup-runner.ts
@@ -159,6 +159,7 @@ export function createFollowupRunner(params: {
           cfg: queued.run.config,
           provider: queued.run.provider,
           model: queued.run.model,
+          runId,
           agentDir: queued.run.agentDir,
           fallbacksOverride: resolveRunModelFallbacksOverride({
             cfg: queued.run.config,
diff --git a/src/commands/agent.ts b/src/commands/agent.ts
index 24e62cc89..74a5078d0 100644
--- a/src/commands/agent.ts
+++ b/src/commands/agent.ts
@@ -1103,6 +1103,7 @@ async function agentCommandInternal(
         cfg,
         provider,
         model,
+        runId,
         agentDir,
         fallbacksOverride: effectiveFallbacksOverride,
         run: (providerOverride, modelOverride, runOptions) => {
diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts
index 5b665b6bf..0666b752e 100644
--- a/src/cron/isolated-agent/run.ts
+++ b/src/cron/isolated-agent/run.ts
@@ -553,6 +553,7 @@ export async function runCronIsolatedAgentTurn(params: {
         cfg: cfgWithAgentDefaults,
         provider,
         model,
+        runId: cronSession.sessionEntry.sessionId,
         agentDir,
         fallbacksOverride:
           payloadFallbacks ?? resolveAgentModelFallbacksOverride(params.cfg, agentId),