fix(daemon): address clanker review findings for kickstart restart
Bug 1 (high): replace fixed sleep 1 with caller-PID polling in both
kickstart and start-after-exit handoff modes. The helper now waits until
kill -0 $caller_pid fails before issuing launchctl kickstart -k.
Bug 2 (medium): gate enable+bootstrap fallback on isLaunchctlNotLoaded().
Only attempt re-registration when kickstart -k fails because the job is
absent; all other kickstart failures now re-throw the original error.
Follows up on 3c0fd3dffe.
Fixes #43311, #43406, #43035, #43049
This commit is contained in:
@@ -65,7 +65,7 @@ describe("runServiceRestart config pre-flight (#35862)", () => {
|
||||
service.restart.mockClear();
|
||||
service.isLoaded.mockResolvedValue(true);
|
||||
service.readCommand.mockResolvedValue({ environment: {} });
|
||||
service.restart.mockResolvedValue(undefined);
|
||||
service.restart.mockResolvedValue({ outcome: "completed" });
|
||||
vi.unstubAllEnvs();
|
||||
vi.stubEnv("OPENCLAW_GATEWAY_TOKEN", "");
|
||||
vi.stubEnv("CLAWDBOT_GATEWAY_TOKEN", "");
|
||||
@@ -163,7 +163,7 @@ describe("runServiceStart config pre-flight (#35862)", () => {
|
||||
service.isLoaded.mockClear();
|
||||
service.restart.mockClear();
|
||||
service.isLoaded.mockResolvedValue(true);
|
||||
service.restart.mockResolvedValue(undefined);
|
||||
service.restart.mockResolvedValue({ outcome: "completed" });
|
||||
});
|
||||
|
||||
it("aborts start when config is invalid", async () => {
|
||||
|
||||
@@ -64,7 +64,7 @@ describe("runServiceRestart token drift", () => {
|
||||
service.readCommand.mockResolvedValue({
|
||||
environment: { OPENCLAW_GATEWAY_TOKEN: "service-token" },
|
||||
});
|
||||
service.restart.mockResolvedValue(undefined);
|
||||
service.restart.mockResolvedValue({ outcome: "completed" });
|
||||
vi.unstubAllEnvs();
|
||||
vi.stubEnv("OPENCLAW_GATEWAY_TOKEN", "");
|
||||
vi.stubEnv("CLAWDBOT_GATEWAY_TOKEN", "");
|
||||
@@ -176,4 +176,24 @@ describe("runServiceRestart token drift", () => {
|
||||
expect(payload.result).toBe("restarted");
|
||||
expect(payload.message).toContain("unmanaged process");
|
||||
});
|
||||
|
||||
it("skips restart health checks when restart is only scheduled", async () => {
|
||||
const postRestartCheck = vi.fn(async () => {});
|
||||
service.restart.mockResolvedValue({ outcome: "scheduled" });
|
||||
|
||||
const result = await runServiceRestart({
|
||||
serviceNoun: "Gateway",
|
||||
service,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
postRestartCheck,
|
||||
});
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(postRestartCheck).not.toHaveBeenCalled();
|
||||
const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{"));
|
||||
const payload = JSON.parse(jsonLine ?? "{}") as { result?: string; message?: string };
|
||||
expect(payload.result).toBe("scheduled");
|
||||
expect(payload.message).toBe("restart scheduled, gateway will restart momentarily");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -3,6 +3,7 @@ import { readBestEffortConfig, readConfigFileSnapshot } from "../../config/confi
|
||||
import { formatConfigIssueLines } from "../../config/issue-format.js";
|
||||
import { resolveIsNixMode } from "../../config/paths.js";
|
||||
import { checkTokenDrift } from "../../daemon/service-audit.js";
|
||||
import type { GatewayServiceRestartResult } from "../../daemon/service-types.js";
|
||||
import type { GatewayService } from "../../daemon/service.js";
|
||||
import { renderSystemdUnavailableHints } from "../../daemon/systemd-hints.js";
|
||||
import { isSystemdUserServiceAvailable } from "../../daemon/systemd.js";
|
||||
@@ -402,8 +403,23 @@ export async function runServiceRestart(params: {
|
||||
}
|
||||
|
||||
try {
|
||||
let restartResult: GatewayServiceRestartResult = { outcome: "completed" };
|
||||
if (loaded) {
|
||||
await params.service.restart({ env: process.env, stdout });
|
||||
restartResult = await params.service.restart({ env: process.env, stdout });
|
||||
}
|
||||
if (restartResult.outcome === "scheduled") {
|
||||
const message = `restart scheduled, ${params.serviceNoun.toLowerCase()} will restart momentarily`;
|
||||
emit({
|
||||
ok: true,
|
||||
result: "scheduled",
|
||||
message,
|
||||
service: buildDaemonServiceSnapshot(params.service, loaded),
|
||||
warnings: warnings.length ? warnings : undefined,
|
||||
});
|
||||
if (!json) {
|
||||
defaultRuntime.log(message);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (params.postRestartCheck) {
|
||||
await params.postRestartCheck({ json, stdout, warnings, fail });
|
||||
|
||||
43
src/daemon/launchd-restart-handoff.test.ts
Normal file
43
src/daemon/launchd-restart-handoff.test.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const spawnMock = vi.hoisted(() => vi.fn());
|
||||
const unrefMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("node:child_process", () => ({
|
||||
spawn: (...args: unknown[]) => spawnMock(...args),
|
||||
}));
|
||||
|
||||
import { scheduleDetachedLaunchdRestartHandoff } from "./launchd-restart-handoff.js";
|
||||
|
||||
afterEach(() => {
|
||||
spawnMock.mockReset();
|
||||
unrefMock.mockReset();
|
||||
spawnMock.mockReturnValue({ pid: 4242, unref: unrefMock });
|
||||
});
|
||||
|
||||
describe("scheduleDetachedLaunchdRestartHandoff", () => {
|
||||
it("waits for the caller pid before kickstarting launchd", () => {
|
||||
const env = {
|
||||
HOME: "/Users/test",
|
||||
OPENCLAW_PROFILE: "default",
|
||||
};
|
||||
spawnMock.mockReturnValue({ pid: 4242, unref: unrefMock });
|
||||
|
||||
const result = scheduleDetachedLaunchdRestartHandoff({
|
||||
env,
|
||||
mode: "kickstart",
|
||||
waitForPid: 9876,
|
||||
});
|
||||
|
||||
expect(result).toEqual({ ok: true, pid: 4242 });
|
||||
expect(spawnMock).toHaveBeenCalledTimes(1);
|
||||
const [, args] = spawnMock.mock.calls[0] as [string, string[]];
|
||||
expect(args[0]).toBe("-c");
|
||||
expect(args[2]).toBe("openclaw-launchd-restart-handoff");
|
||||
expect(args[6]).toBe("9876");
|
||||
expect(args[1]).toContain('while kill -0 "$wait_pid" >/dev/null 2>&1; do');
|
||||
expect(args[1]).toContain('launchctl kickstart -k "$service_target" >/dev/null 2>&1');
|
||||
expect(args[1]).not.toContain("sleep 1");
|
||||
expect(unrefMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
@@ -62,11 +62,19 @@ export function isCurrentProcessLaunchdServiceLabel(
|
||||
}
|
||||
|
||||
function buildLaunchdRestartScript(mode: LaunchdRestartHandoffMode): string {
|
||||
const waitForCallerPid = `wait_pid="$4"
|
||||
if [ -n "$wait_pid" ] && [ "$wait_pid" -gt 1 ] 2>/dev/null; then
|
||||
while kill -0 "$wait_pid" >/dev/null 2>&1; do
|
||||
sleep 0.1
|
||||
done
|
||||
fi
|
||||
`;
|
||||
|
||||
if (mode === "kickstart") {
|
||||
return `service_target="$1"
|
||||
domain="$2"
|
||||
plist_path="$3"
|
||||
sleep 1
|
||||
${waitForCallerPid}
|
||||
if ! launchctl kickstart -k "$service_target" >/dev/null 2>&1; then
|
||||
launchctl enable "$service_target" >/dev/null 2>&1
|
||||
if launchctl bootstrap "$domain" "$plist_path" >/dev/null 2>&1; then
|
||||
@@ -79,19 +87,7 @@ fi
|
||||
return `service_target="$1"
|
||||
domain="$2"
|
||||
plist_path="$3"
|
||||
wait_pid="$4"
|
||||
if [ -n "$wait_pid" ] && [ "$wait_pid" -gt 1 ] 2>/dev/null; then
|
||||
attempts=0
|
||||
while kill -0 "$wait_pid" >/dev/null 2>&1; do
|
||||
attempts=$((attempts + 1))
|
||||
if [ "$attempts" -ge 100 ]; then
|
||||
break
|
||||
fi
|
||||
sleep 0.1
|
||||
done
|
||||
else
|
||||
sleep 1
|
||||
fi
|
||||
${waitForCallerPid}
|
||||
if ! launchctl start "$service_target" >/dev/null 2>&1; then
|
||||
launchctl enable "$service_target" >/dev/null 2>&1
|
||||
if launchctl bootstrap "$domain" "$plist_path" >/dev/null 2>&1; then
|
||||
|
||||
@@ -332,7 +332,7 @@ describe("launchd install", () => {
|
||||
|
||||
it("restarts LaunchAgent with kickstart and no bootout", async () => {
|
||||
const env = createDefaultLaunchdEnv();
|
||||
await restartLaunchAgent({
|
||||
const result = await restartLaunchAgent({
|
||||
env,
|
||||
stdout: new PassThrough(),
|
||||
});
|
||||
@@ -340,6 +340,7 @@ describe("launchd install", () => {
|
||||
const domain = typeof process.getuid === "function" ? `gui/${process.getuid()}` : "gui/501";
|
||||
const label = "ai.openclaw.gateway";
|
||||
const serviceId = `${domain}/${label}`;
|
||||
expect(result).toEqual({ outcome: "completed" });
|
||||
expect(state.launchctlCalls).toContainEqual(["kickstart", "-k", serviceId]);
|
||||
expect(state.launchctlCalls.some((call) => call[0] === "bootout")).toBe(false);
|
||||
expect(state.launchctlCalls.some((call) => call[0] === "bootstrap")).toBe(false);
|
||||
@@ -350,7 +351,7 @@ describe("launchd install", () => {
|
||||
state.kickstartError = "Could not find service";
|
||||
state.kickstartFailuresRemaining = 1;
|
||||
|
||||
await restartLaunchAgent({
|
||||
const result = await restartLaunchAgent({
|
||||
env,
|
||||
stdout: new PassThrough(),
|
||||
});
|
||||
@@ -369,24 +370,43 @@ describe("launchd install", () => {
|
||||
(c) => c[0] === "bootstrap" && c[1] === domain && c[2] === plistPath,
|
||||
);
|
||||
|
||||
expect(result).toEqual({ outcome: "completed" });
|
||||
expect(kickstartCalls).toHaveLength(2);
|
||||
expect(enableIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(bootstrapIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(state.launchctlCalls.some((call) => call[0] === "bootout")).toBe(false);
|
||||
});
|
||||
|
||||
it("surfaces the original kickstart failure when the service is still loaded", async () => {
|
||||
const env = createDefaultLaunchdEnv();
|
||||
state.kickstartError = "Input/output error";
|
||||
state.kickstartFailuresRemaining = 1;
|
||||
|
||||
await expect(
|
||||
restartLaunchAgent({
|
||||
env,
|
||||
stdout: new PassThrough(),
|
||||
}),
|
||||
).rejects.toThrow("launchctl kickstart failed: Input/output error");
|
||||
|
||||
expect(state.launchctlCalls.some((call) => call[0] === "enable")).toBe(false);
|
||||
expect(state.launchctlCalls.some((call) => call[0] === "bootstrap")).toBe(false);
|
||||
});
|
||||
|
||||
it("hands restart off to a detached helper when invoked from the current LaunchAgent", async () => {
|
||||
const env = createDefaultLaunchdEnv();
|
||||
launchdRestartHandoffState.isCurrentProcessLaunchdServiceLabel.mockReturnValue(true);
|
||||
|
||||
await restartLaunchAgent({
|
||||
const result = await restartLaunchAgent({
|
||||
env,
|
||||
stdout: new PassThrough(),
|
||||
});
|
||||
|
||||
expect(result).toEqual({ outcome: "scheduled" });
|
||||
expect(launchdRestartHandoffState.scheduleDetachedLaunchdRestartHandoff).toHaveBeenCalledWith({
|
||||
env,
|
||||
mode: "kickstart",
|
||||
waitForPid: process.pid,
|
||||
});
|
||||
expect(state.launchctlCalls).toEqual([]);
|
||||
});
|
||||
|
||||
@@ -27,6 +27,7 @@ import type {
|
||||
GatewayServiceEnvArgs,
|
||||
GatewayServiceInstallArgs,
|
||||
GatewayServiceManageArgs,
|
||||
GatewayServiceRestartResult,
|
||||
} from "./service-types.js";
|
||||
|
||||
const LAUNCH_AGENT_DIR_MODE = 0o755;
|
||||
@@ -447,7 +448,7 @@ export async function installLaunchAgent({
|
||||
export async function restartLaunchAgent({
|
||||
stdout,
|
||||
env,
|
||||
}: GatewayServiceControlArgs): Promise<void> {
|
||||
}: GatewayServiceControlArgs): Promise<GatewayServiceRestartResult> {
|
||||
const serviceEnv = env ?? (process.env as GatewayServiceEnv);
|
||||
const domain = resolveGuiDomain();
|
||||
const label = resolveLaunchAgentLabel({ env: serviceEnv });
|
||||
@@ -461,6 +462,7 @@ export async function restartLaunchAgent({
|
||||
const handoff = scheduleDetachedLaunchdRestartHandoff({
|
||||
env: serviceEnv,
|
||||
mode: "kickstart",
|
||||
waitForPid: process.pid,
|
||||
});
|
||||
if (!handoff.ok) {
|
||||
throw new Error(`launchd restart handoff failed: ${handoff.detail ?? "unknown error"}`);
|
||||
@@ -472,7 +474,7 @@ export async function restartLaunchAgent({
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
return;
|
||||
return { outcome: "scheduled" };
|
||||
}
|
||||
|
||||
const start = await execLaunchctl(["kickstart", "-k", serviceTarget]);
|
||||
@@ -484,7 +486,11 @@ export async function restartLaunchAgent({
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
return;
|
||||
return { outcome: "completed" };
|
||||
}
|
||||
|
||||
if (!isLaunchctlNotLoaded(start)) {
|
||||
throw new Error(`launchctl kickstart failed: ${start.stderr || start.stdout}`.trim());
|
||||
}
|
||||
|
||||
// If the service was previously booted out, re-register the plist and retry.
|
||||
@@ -517,4 +523,5 @@ export async function restartLaunchAgent({
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
return { outcome: "completed" };
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import type {
|
||||
GatewayServiceInstallArgs,
|
||||
GatewayServiceManageArgs,
|
||||
GatewayServiceRenderArgs,
|
||||
GatewayServiceRestartResult,
|
||||
} from "./service-types.js";
|
||||
|
||||
function resolveTaskName(env: GatewayServiceEnv): string {
|
||||
@@ -316,7 +317,7 @@ export async function stopScheduledTask({ stdout, env }: GatewayServiceControlAr
|
||||
export async function restartScheduledTask({
|
||||
stdout,
|
||||
env,
|
||||
}: GatewayServiceControlArgs): Promise<void> {
|
||||
}: GatewayServiceControlArgs): Promise<GatewayServiceRestartResult> {
|
||||
await assertSchtasksAvailable();
|
||||
const taskName = resolveTaskName(env ?? (process.env as GatewayServiceEnv));
|
||||
await execSchtasks(["/End", "/TN", taskName]);
|
||||
@@ -325,6 +326,7 @@ export async function restartScheduledTask({
|
||||
throw new Error(`schtasks run failed: ${res.stderr || res.stdout}`.trim());
|
||||
}
|
||||
stdout.write(`${formatLine("Restarted Scheduled Task", taskName)}\n`);
|
||||
return { outcome: "completed" };
|
||||
}
|
||||
|
||||
export async function isScheduledTaskInstalled(args: GatewayServiceEnvArgs): Promise<boolean> {
|
||||
|
||||
@@ -19,6 +19,8 @@ export type GatewayServiceControlArgs = {
|
||||
env?: GatewayServiceEnv;
|
||||
};
|
||||
|
||||
export type GatewayServiceRestartResult = { outcome: "completed" } | { outcome: "scheduled" };
|
||||
|
||||
export type GatewayServiceEnvArgs = {
|
||||
env?: GatewayServiceEnv;
|
||||
};
|
||||
|
||||
@@ -24,6 +24,7 @@ import type {
|
||||
GatewayServiceEnvArgs,
|
||||
GatewayServiceInstallArgs,
|
||||
GatewayServiceManageArgs,
|
||||
GatewayServiceRestartResult,
|
||||
} from "./service-types.js";
|
||||
import {
|
||||
installSystemdService,
|
||||
@@ -41,6 +42,7 @@ export type {
|
||||
GatewayServiceEnvArgs,
|
||||
GatewayServiceInstallArgs,
|
||||
GatewayServiceManageArgs,
|
||||
GatewayServiceRestartResult,
|
||||
} from "./service-types.js";
|
||||
|
||||
function ignoreInstallResult(
|
||||
@@ -58,7 +60,7 @@ export type GatewayService = {
|
||||
install: (args: GatewayServiceInstallArgs) => Promise<void>;
|
||||
uninstall: (args: GatewayServiceManageArgs) => Promise<void>;
|
||||
stop: (args: GatewayServiceControlArgs) => Promise<void>;
|
||||
restart: (args: GatewayServiceControlArgs) => Promise<void>;
|
||||
restart: (args: GatewayServiceControlArgs) => Promise<GatewayServiceRestartResult>;
|
||||
isLoaded: (args: GatewayServiceEnvArgs) => Promise<boolean>;
|
||||
readCommand: (env: GatewayServiceEnv) => Promise<GatewayServiceCommandConfig | null>;
|
||||
readRuntime: (env: GatewayServiceEnv) => Promise<GatewayServiceRuntime>;
|
||||
|
||||
@@ -20,6 +20,7 @@ import type {
|
||||
GatewayServiceEnvArgs,
|
||||
GatewayServiceInstallArgs,
|
||||
GatewayServiceManageArgs,
|
||||
GatewayServiceRestartResult,
|
||||
} from "./service-types.js";
|
||||
import {
|
||||
enableSystemdUserLinger,
|
||||
@@ -570,13 +571,14 @@ export async function stopSystemdService({
|
||||
export async function restartSystemdService({
|
||||
stdout,
|
||||
env,
|
||||
}: GatewayServiceControlArgs): Promise<void> {
|
||||
}: GatewayServiceControlArgs): Promise<GatewayServiceRestartResult> {
|
||||
await runSystemdServiceAction({
|
||||
stdout,
|
||||
env,
|
||||
action: "restart",
|
||||
label: "Restarted systemd service",
|
||||
});
|
||||
return { outcome: "completed" };
|
||||
}
|
||||
|
||||
export async function isSystemdServiceEnabled(args: GatewayServiceEnvArgs): Promise<boolean> {
|
||||
|
||||
Reference in New Issue
Block a user