fix(daemon): address clanker review findings for kickstart restart

Bug 1 (high): replace fixed sleep 1 with caller-PID polling in both
kickstart and start-after-exit handoff modes. The helper now waits until
kill -0 $caller_pid fails before issuing launchctl kickstart -k.

Bug 2 (medium): gate enable+bootstrap fallback on isLaunchctlNotLoaded().
Only attempt re-registration when kickstart -k fails because the job is
absent; all other kickstart failures now re-throw the original error.

Follows up on 3c0fd3dffe.
Fixes #43311, #43406, #43035, #43049
This commit is contained in:
Robin Waslander
2026-03-12 02:16:24 +01:00
committed by GitHub
parent b7a37c2023
commit 841ee24340
11 changed files with 137 additions and 27 deletions

View File

@@ -65,7 +65,7 @@ describe("runServiceRestart config pre-flight (#35862)", () => {
service.restart.mockClear();
service.isLoaded.mockResolvedValue(true);
service.readCommand.mockResolvedValue({ environment: {} });
service.restart.mockResolvedValue(undefined);
service.restart.mockResolvedValue({ outcome: "completed" });
vi.unstubAllEnvs();
vi.stubEnv("OPENCLAW_GATEWAY_TOKEN", "");
vi.stubEnv("CLAWDBOT_GATEWAY_TOKEN", "");
@@ -163,7 +163,7 @@ describe("runServiceStart config pre-flight (#35862)", () => {
service.isLoaded.mockClear();
service.restart.mockClear();
service.isLoaded.mockResolvedValue(true);
service.restart.mockResolvedValue(undefined);
service.restart.mockResolvedValue({ outcome: "completed" });
});
it("aborts start when config is invalid", async () => {

View File

@@ -64,7 +64,7 @@ describe("runServiceRestart token drift", () => {
service.readCommand.mockResolvedValue({
environment: { OPENCLAW_GATEWAY_TOKEN: "service-token" },
});
service.restart.mockResolvedValue(undefined);
service.restart.mockResolvedValue({ outcome: "completed" });
vi.unstubAllEnvs();
vi.stubEnv("OPENCLAW_GATEWAY_TOKEN", "");
vi.stubEnv("CLAWDBOT_GATEWAY_TOKEN", "");
@@ -176,4 +176,24 @@ describe("runServiceRestart token drift", () => {
expect(payload.result).toBe("restarted");
expect(payload.message).toContain("unmanaged process");
});
it("skips restart health checks when restart is only scheduled", async () => {
const postRestartCheck = vi.fn(async () => {});
service.restart.mockResolvedValue({ outcome: "scheduled" });
const result = await runServiceRestart({
serviceNoun: "Gateway",
service,
renderStartHints: () => [],
opts: { json: true },
postRestartCheck,
});
expect(result).toBe(true);
expect(postRestartCheck).not.toHaveBeenCalled();
const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{"));
const payload = JSON.parse(jsonLine ?? "{}") as { result?: string; message?: string };
expect(payload.result).toBe("scheduled");
expect(payload.message).toBe("restart scheduled, gateway will restart momentarily");
});
});

View File

@@ -3,6 +3,7 @@ import { readBestEffortConfig, readConfigFileSnapshot } from "../../config/confi
import { formatConfigIssueLines } from "../../config/issue-format.js";
import { resolveIsNixMode } from "../../config/paths.js";
import { checkTokenDrift } from "../../daemon/service-audit.js";
import type { GatewayServiceRestartResult } from "../../daemon/service-types.js";
import type { GatewayService } from "../../daemon/service.js";
import { renderSystemdUnavailableHints } from "../../daemon/systemd-hints.js";
import { isSystemdUserServiceAvailable } from "../../daemon/systemd.js";
@@ -402,8 +403,23 @@ export async function runServiceRestart(params: {
}
try {
let restartResult: GatewayServiceRestartResult = { outcome: "completed" };
if (loaded) {
await params.service.restart({ env: process.env, stdout });
restartResult = await params.service.restart({ env: process.env, stdout });
}
if (restartResult.outcome === "scheduled") {
const message = `restart scheduled, ${params.serviceNoun.toLowerCase()} will restart momentarily`;
emit({
ok: true,
result: "scheduled",
message,
service: buildDaemonServiceSnapshot(params.service, loaded),
warnings: warnings.length ? warnings : undefined,
});
if (!json) {
defaultRuntime.log(message);
}
return true;
}
if (params.postRestartCheck) {
await params.postRestartCheck({ json, stdout, warnings, fail });

View File

@@ -0,0 +1,43 @@
import { afterEach, describe, expect, it, vi } from "vitest";
const spawnMock = vi.hoisted(() => vi.fn());
const unrefMock = vi.hoisted(() => vi.fn());
vi.mock("node:child_process", () => ({
spawn: (...args: unknown[]) => spawnMock(...args),
}));
import { scheduleDetachedLaunchdRestartHandoff } from "./launchd-restart-handoff.js";
afterEach(() => {
spawnMock.mockReset();
unrefMock.mockReset();
spawnMock.mockReturnValue({ pid: 4242, unref: unrefMock });
});
describe("scheduleDetachedLaunchdRestartHandoff", () => {
it("waits for the caller pid before kickstarting launchd", () => {
const env = {
HOME: "/Users/test",
OPENCLAW_PROFILE: "default",
};
spawnMock.mockReturnValue({ pid: 4242, unref: unrefMock });
const result = scheduleDetachedLaunchdRestartHandoff({
env,
mode: "kickstart",
waitForPid: 9876,
});
expect(result).toEqual({ ok: true, pid: 4242 });
expect(spawnMock).toHaveBeenCalledTimes(1);
const [, args] = spawnMock.mock.calls[0] as [string, string[]];
expect(args[0]).toBe("-c");
expect(args[2]).toBe("openclaw-launchd-restart-handoff");
expect(args[6]).toBe("9876");
expect(args[1]).toContain('while kill -0 "$wait_pid" >/dev/null 2>&1; do');
expect(args[1]).toContain('launchctl kickstart -k "$service_target" >/dev/null 2>&1');
expect(args[1]).not.toContain("sleep 1");
expect(unrefMock).toHaveBeenCalledTimes(1);
});
});

View File

@@ -62,11 +62,19 @@ export function isCurrentProcessLaunchdServiceLabel(
}
function buildLaunchdRestartScript(mode: LaunchdRestartHandoffMode): string {
const waitForCallerPid = `wait_pid="$4"
if [ -n "$wait_pid" ] && [ "$wait_pid" -gt 1 ] 2>/dev/null; then
while kill -0 "$wait_pid" >/dev/null 2>&1; do
sleep 0.1
done
fi
`;
if (mode === "kickstart") {
return `service_target="$1"
domain="$2"
plist_path="$3"
sleep 1
${waitForCallerPid}
if ! launchctl kickstart -k "$service_target" >/dev/null 2>&1; then
launchctl enable "$service_target" >/dev/null 2>&1
if launchctl bootstrap "$domain" "$plist_path" >/dev/null 2>&1; then
@@ -79,19 +87,7 @@ fi
return `service_target="$1"
domain="$2"
plist_path="$3"
wait_pid="$4"
if [ -n "$wait_pid" ] && [ "$wait_pid" -gt 1 ] 2>/dev/null; then
attempts=0
while kill -0 "$wait_pid" >/dev/null 2>&1; do
attempts=$((attempts + 1))
if [ "$attempts" -ge 100 ]; then
break
fi
sleep 0.1
done
else
sleep 1
fi
${waitForCallerPid}
if ! launchctl start "$service_target" >/dev/null 2>&1; then
launchctl enable "$service_target" >/dev/null 2>&1
if launchctl bootstrap "$domain" "$plist_path" >/dev/null 2>&1; then

View File

@@ -332,7 +332,7 @@ describe("launchd install", () => {
it("restarts LaunchAgent with kickstart and no bootout", async () => {
const env = createDefaultLaunchdEnv();
await restartLaunchAgent({
const result = await restartLaunchAgent({
env,
stdout: new PassThrough(),
});
@@ -340,6 +340,7 @@ describe("launchd install", () => {
const domain = typeof process.getuid === "function" ? `gui/${process.getuid()}` : "gui/501";
const label = "ai.openclaw.gateway";
const serviceId = `${domain}/${label}`;
expect(result).toEqual({ outcome: "completed" });
expect(state.launchctlCalls).toContainEqual(["kickstart", "-k", serviceId]);
expect(state.launchctlCalls.some((call) => call[0] === "bootout")).toBe(false);
expect(state.launchctlCalls.some((call) => call[0] === "bootstrap")).toBe(false);
@@ -350,7 +351,7 @@ describe("launchd install", () => {
state.kickstartError = "Could not find service";
state.kickstartFailuresRemaining = 1;
await restartLaunchAgent({
const result = await restartLaunchAgent({
env,
stdout: new PassThrough(),
});
@@ -369,24 +370,43 @@ describe("launchd install", () => {
(c) => c[0] === "bootstrap" && c[1] === domain && c[2] === plistPath,
);
expect(result).toEqual({ outcome: "completed" });
expect(kickstartCalls).toHaveLength(2);
expect(enableIndex).toBeGreaterThanOrEqual(0);
expect(bootstrapIndex).toBeGreaterThanOrEqual(0);
expect(state.launchctlCalls.some((call) => call[0] === "bootout")).toBe(false);
});
it("surfaces the original kickstart failure when the service is still loaded", async () => {
const env = createDefaultLaunchdEnv();
state.kickstartError = "Input/output error";
state.kickstartFailuresRemaining = 1;
await expect(
restartLaunchAgent({
env,
stdout: new PassThrough(),
}),
).rejects.toThrow("launchctl kickstart failed: Input/output error");
expect(state.launchctlCalls.some((call) => call[0] === "enable")).toBe(false);
expect(state.launchctlCalls.some((call) => call[0] === "bootstrap")).toBe(false);
});
it("hands restart off to a detached helper when invoked from the current LaunchAgent", async () => {
const env = createDefaultLaunchdEnv();
launchdRestartHandoffState.isCurrentProcessLaunchdServiceLabel.mockReturnValue(true);
await restartLaunchAgent({
const result = await restartLaunchAgent({
env,
stdout: new PassThrough(),
});
expect(result).toEqual({ outcome: "scheduled" });
expect(launchdRestartHandoffState.scheduleDetachedLaunchdRestartHandoff).toHaveBeenCalledWith({
env,
mode: "kickstart",
waitForPid: process.pid,
});
expect(state.launchctlCalls).toEqual([]);
});

View File

@@ -27,6 +27,7 @@ import type {
GatewayServiceEnvArgs,
GatewayServiceInstallArgs,
GatewayServiceManageArgs,
GatewayServiceRestartResult,
} from "./service-types.js";
const LAUNCH_AGENT_DIR_MODE = 0o755;
@@ -447,7 +448,7 @@ export async function installLaunchAgent({
export async function restartLaunchAgent({
stdout,
env,
}: GatewayServiceControlArgs): Promise<void> {
}: GatewayServiceControlArgs): Promise<GatewayServiceRestartResult> {
const serviceEnv = env ?? (process.env as GatewayServiceEnv);
const domain = resolveGuiDomain();
const label = resolveLaunchAgentLabel({ env: serviceEnv });
@@ -461,6 +462,7 @@ export async function restartLaunchAgent({
const handoff = scheduleDetachedLaunchdRestartHandoff({
env: serviceEnv,
mode: "kickstart",
waitForPid: process.pid,
});
if (!handoff.ok) {
throw new Error(`launchd restart handoff failed: ${handoff.detail ?? "unknown error"}`);
@@ -472,7 +474,7 @@ export async function restartLaunchAgent({
throw err;
}
}
return;
return { outcome: "scheduled" };
}
const start = await execLaunchctl(["kickstart", "-k", serviceTarget]);
@@ -484,7 +486,11 @@ export async function restartLaunchAgent({
throw err;
}
}
return;
return { outcome: "completed" };
}
if (!isLaunchctlNotLoaded(start)) {
throw new Error(`launchctl kickstart failed: ${start.stderr || start.stdout}`.trim());
}
// If the service was previously booted out, re-register the plist and retry.
@@ -517,4 +523,5 @@ export async function restartLaunchAgent({
throw err;
}
}
return { outcome: "completed" };
}

View File

@@ -16,6 +16,7 @@ import type {
GatewayServiceInstallArgs,
GatewayServiceManageArgs,
GatewayServiceRenderArgs,
GatewayServiceRestartResult,
} from "./service-types.js";
function resolveTaskName(env: GatewayServiceEnv): string {
@@ -316,7 +317,7 @@ export async function stopScheduledTask({ stdout, env }: GatewayServiceControlAr
export async function restartScheduledTask({
stdout,
env,
}: GatewayServiceControlArgs): Promise<void> {
}: GatewayServiceControlArgs): Promise<GatewayServiceRestartResult> {
await assertSchtasksAvailable();
const taskName = resolveTaskName(env ?? (process.env as GatewayServiceEnv));
await execSchtasks(["/End", "/TN", taskName]);
@@ -325,6 +326,7 @@ export async function restartScheduledTask({
throw new Error(`schtasks run failed: ${res.stderr || res.stdout}`.trim());
}
stdout.write(`${formatLine("Restarted Scheduled Task", taskName)}\n`);
return { outcome: "completed" };
}
export async function isScheduledTaskInstalled(args: GatewayServiceEnvArgs): Promise<boolean> {

View File

@@ -19,6 +19,8 @@ export type GatewayServiceControlArgs = {
env?: GatewayServiceEnv;
};
export type GatewayServiceRestartResult = { outcome: "completed" } | { outcome: "scheduled" };
export type GatewayServiceEnvArgs = {
env?: GatewayServiceEnv;
};

View File

@@ -24,6 +24,7 @@ import type {
GatewayServiceEnvArgs,
GatewayServiceInstallArgs,
GatewayServiceManageArgs,
GatewayServiceRestartResult,
} from "./service-types.js";
import {
installSystemdService,
@@ -41,6 +42,7 @@ export type {
GatewayServiceEnvArgs,
GatewayServiceInstallArgs,
GatewayServiceManageArgs,
GatewayServiceRestartResult,
} from "./service-types.js";
function ignoreInstallResult(
@@ -58,7 +60,7 @@ export type GatewayService = {
install: (args: GatewayServiceInstallArgs) => Promise<void>;
uninstall: (args: GatewayServiceManageArgs) => Promise<void>;
stop: (args: GatewayServiceControlArgs) => Promise<void>;
restart: (args: GatewayServiceControlArgs) => Promise<void>;
restart: (args: GatewayServiceControlArgs) => Promise<GatewayServiceRestartResult>;
isLoaded: (args: GatewayServiceEnvArgs) => Promise<boolean>;
readCommand: (env: GatewayServiceEnv) => Promise<GatewayServiceCommandConfig | null>;
readRuntime: (env: GatewayServiceEnv) => Promise<GatewayServiceRuntime>;

View File

@@ -20,6 +20,7 @@ import type {
GatewayServiceEnvArgs,
GatewayServiceInstallArgs,
GatewayServiceManageArgs,
GatewayServiceRestartResult,
} from "./service-types.js";
import {
enableSystemdUserLinger,
@@ -570,13 +571,14 @@ export async function stopSystemdService({
export async function restartSystemdService({
stdout,
env,
}: GatewayServiceControlArgs): Promise<void> {
}: GatewayServiceControlArgs): Promise<GatewayServiceRestartResult> {
await runSystemdServiceAction({
stdout,
env,
action: "restart",
label: "Restarted systemd service",
});
return { outcome: "completed" };
}
export async function isSystemdServiceEnabled(args: GatewayServiceEnvArgs): Promise<boolean> {