Files
openclaw/src/infra/archive.ts
2026-02-17 13:36:48 +09:00

438 lines
13 KiB
TypeScript

import { createWriteStream } from "node:fs";
import fs from "node:fs/promises";
import path from "node:path";
import { Readable, Transform } from "node:stream";
import { pipeline } from "node:stream/promises";
import JSZip from "jszip";
import * as tar from "tar";
import { resolveSafeBaseDir } from "./path-safety.js";
export type ArchiveKind = "tar" | "zip";
export type ArchiveLogger = {
info?: (message: string) => void;
warn?: (message: string) => void;
};
export type ArchiveExtractLimits = {
/**
* Max archive file bytes (compressed). Primarily protects zip extraction
* because we currently read the whole archive into memory for parsing.
*/
maxArchiveBytes?: number;
/** Max number of extracted entries (files + dirs). */
maxEntries?: number;
/** Max extracted bytes (sum of all files). */
maxExtractedBytes?: number;
/** Max extracted bytes for a single file entry. */
maxEntryBytes?: number;
};
/** @internal */
export const DEFAULT_MAX_ARCHIVE_BYTES_ZIP = 256 * 1024 * 1024;
/** @internal */
export const DEFAULT_MAX_ENTRIES = 50_000;
/** @internal */
export const DEFAULT_MAX_EXTRACTED_BYTES = 512 * 1024 * 1024;
/** @internal */
export const DEFAULT_MAX_ENTRY_BYTES = 256 * 1024 * 1024;
const ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT = "archive size exceeds limit";
const ERROR_ARCHIVE_ENTRY_COUNT_EXCEEDS_LIMIT = "archive entry count exceeds limit";
const ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT =
"archive entry extracted size exceeds limit";
const ERROR_ARCHIVE_EXTRACTED_SIZE_EXCEEDS_LIMIT = "archive extracted size exceeds limit";
const TAR_SUFFIXES = [".tgz", ".tar.gz", ".tar"];
export function resolveArchiveKind(filePath: string): ArchiveKind | null {
const lower = filePath.toLowerCase();
if (lower.endsWith(".zip")) {
return "zip";
}
if (TAR_SUFFIXES.some((suffix) => lower.endsWith(suffix))) {
return "tar";
}
return null;
}
export async function resolvePackedRootDir(extractDir: string): Promise<string> {
const direct = path.join(extractDir, "package");
try {
const stat = await fs.stat(direct);
if (stat.isDirectory()) {
return direct;
}
} catch {
// ignore
}
const entries = await fs.readdir(extractDir, { withFileTypes: true });
const dirs = entries.filter((entry) => entry.isDirectory()).map((entry) => entry.name);
if (dirs.length !== 1) {
throw new Error(`unexpected archive layout (dirs: ${dirs.join(", ")})`);
}
const onlyDir = dirs[0];
if (!onlyDir) {
throw new Error("unexpected archive layout (no package dir found)");
}
return path.join(extractDir, onlyDir);
}
export async function withTimeout<T>(
promise: Promise<T>,
timeoutMs: number,
label: string,
): Promise<T> {
let timeoutId: ReturnType<typeof setTimeout> | undefined;
try {
return await Promise.race([
promise,
new Promise<T>((_, reject) => {
timeoutId = setTimeout(
() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)),
timeoutMs,
);
}),
]);
} finally {
if (timeoutId) {
clearTimeout(timeoutId);
}
}
}
// Path hygiene.
function normalizeArchivePath(raw: string): string {
// Archives may contain Windows separators; treat them as separators.
return raw.replaceAll("\\", "/");
}
function isWindowsDrivePath(p: string): boolean {
return /^[a-zA-Z]:[\\/]/.test(p);
}
function validateArchiveEntryPath(entryPath: string): void {
if (!entryPath || entryPath === "." || entryPath === "./") {
return;
}
if (isWindowsDrivePath(entryPath)) {
throw new Error(`archive entry uses a drive path: ${entryPath}`);
}
const normalized = path.posix.normalize(normalizeArchivePath(entryPath));
if (normalized === ".." || normalized.startsWith("../")) {
throw new Error(`archive entry escapes destination: ${entryPath}`);
}
if (path.posix.isAbsolute(normalized) || normalized.startsWith("//")) {
throw new Error(`archive entry is absolute: ${entryPath}`);
}
}
function stripArchivePath(entryPath: string, stripComponents: number): string | null {
const raw = normalizeArchivePath(entryPath);
if (!raw || raw === "." || raw === "./") {
return null;
}
// Important: mimic tar --strip-components semantics (raw segments before
// normalization) so strip-induced escapes like "a/../b" are not hidden.
const parts = raw.split("/").filter((part) => part.length > 0 && part !== ".");
const strip = Math.max(0, Math.floor(stripComponents));
const stripped = strip === 0 ? parts.join("/") : parts.slice(strip).join("/");
const result = path.posix.normalize(stripped);
if (!result || result === "." || result === "./") {
return null;
}
return result;
}
function resolveCheckedOutPath(destDir: string, relPath: string, original: string): string {
const safeBase = resolveSafeBaseDir(destDir);
const outPath = path.resolve(destDir, relPath);
if (!outPath.startsWith(safeBase)) {
throw new Error(`archive entry escapes destination: ${original}`);
}
return outPath;
}
type ResolvedArchiveExtractLimits = Required<ArchiveExtractLimits>;
function clampLimit(value: number | undefined): number | undefined {
if (typeof value !== "number" || !Number.isFinite(value)) {
return undefined;
}
const v = Math.floor(value);
return v > 0 ? v : undefined;
}
function resolveExtractLimits(limits?: ArchiveExtractLimits): ResolvedArchiveExtractLimits {
// Defaults: defensive, but should not break normal installs.
return {
maxArchiveBytes: clampLimit(limits?.maxArchiveBytes) ?? DEFAULT_MAX_ARCHIVE_BYTES_ZIP,
maxEntries: clampLimit(limits?.maxEntries) ?? DEFAULT_MAX_ENTRIES,
maxExtractedBytes: clampLimit(limits?.maxExtractedBytes) ?? DEFAULT_MAX_EXTRACTED_BYTES,
maxEntryBytes: clampLimit(limits?.maxEntryBytes) ?? DEFAULT_MAX_ENTRY_BYTES,
};
}
function assertArchiveEntryCountWithinLimit(
entryCount: number,
limits: ResolvedArchiveExtractLimits,
) {
if (entryCount > limits.maxEntries) {
throw new Error(ERROR_ARCHIVE_ENTRY_COUNT_EXCEEDS_LIMIT);
}
}
function createByteBudgetTracker(limits: ResolvedArchiveExtractLimits): {
startEntry: () => void;
addBytes: (bytes: number) => void;
addEntrySize: (size: number) => void;
} {
let entryBytes = 0;
let extractedBytes = 0;
const addBytes = (bytes: number) => {
const b = Math.max(0, Math.floor(bytes));
if (b === 0) {
return;
}
entryBytes += b;
if (entryBytes > limits.maxEntryBytes) {
throw new Error(ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT);
}
extractedBytes += b;
if (extractedBytes > limits.maxExtractedBytes) {
throw new Error(ERROR_ARCHIVE_EXTRACTED_SIZE_EXCEEDS_LIMIT);
}
};
return {
startEntry() {
entryBytes = 0;
},
addBytes,
addEntrySize(size: number) {
const s = Math.max(0, Math.floor(size));
if (s > limits.maxEntryBytes) {
throw new Error(ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT);
}
// Note: tar budgets are based on the header-declared size.
addBytes(s);
},
};
}
function createExtractBudgetTransform(params: {
onChunkBytes: (bytes: number) => void;
}): Transform {
return new Transform({
transform(chunk, _encoding, callback) {
try {
const buf = chunk instanceof Buffer ? chunk : Buffer.from(chunk as Uint8Array);
params.onChunkBytes(buf.byteLength);
callback(null, buf);
} catch (err) {
callback(err instanceof Error ? err : new Error(String(err)));
}
},
});
}
type ZipEntry = {
name: string;
dir: boolean;
unixPermissions?: number;
nodeStream?: () => NodeJS.ReadableStream;
async: (type: "nodebuffer") => Promise<Buffer>;
};
async function readZipEntryStream(entry: ZipEntry): Promise<NodeJS.ReadableStream> {
if (typeof entry.nodeStream === "function") {
return entry.nodeStream();
}
// Old JSZip: fall back to buffering, but still extract via a stream.
const buf = await entry.async("nodebuffer");
return Readable.from(buf);
}
async function extractZip(params: {
archivePath: string;
destDir: string;
stripComponents?: number;
limits?: ArchiveExtractLimits;
}): Promise<void> {
const limits = resolveExtractLimits(params.limits);
const stat = await fs.stat(params.archivePath);
if (stat.size > limits.maxArchiveBytes) {
throw new Error(ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT);
}
const buffer = await fs.readFile(params.archivePath);
const zip = await JSZip.loadAsync(buffer);
const entries = Object.values(zip.files) as ZipEntry[];
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
assertArchiveEntryCountWithinLimit(entries.length, limits);
const budget = createByteBudgetTracker(limits);
for (const entry of entries) {
validateArchiveEntryPath(entry.name);
const relPath = stripArchivePath(entry.name, strip);
if (!relPath) {
continue;
}
validateArchiveEntryPath(relPath);
const outPath = resolveCheckedOutPath(params.destDir, relPath, entry.name);
if (entry.dir) {
await fs.mkdir(outPath, { recursive: true });
continue;
}
await fs.mkdir(path.dirname(outPath), { recursive: true });
budget.startEntry();
const readable = await readZipEntryStream(entry);
try {
await pipeline(
readable,
createExtractBudgetTransform({ onChunkBytes: budget.addBytes }),
createWriteStream(outPath),
);
} catch (err) {
await fs.unlink(outPath).catch(() => undefined);
throw err;
}
// Best-effort permission restore for zip entries created on unix.
if (typeof entry.unixPermissions === "number") {
const mode = entry.unixPermissions & 0o777;
if (mode !== 0) {
await fs.chmod(outPath, mode).catch(() => undefined);
}
}
}
}
type TarEntryInfo = { path: string; type: string; size: number };
function readTarEntryInfo(entry: unknown): TarEntryInfo {
const p =
typeof entry === "object" && entry !== null && "path" in entry
? String((entry as { path: unknown }).path)
: "";
const t =
typeof entry === "object" && entry !== null && "type" in entry
? String((entry as { type: unknown }).type)
: "";
const s =
typeof entry === "object" &&
entry !== null &&
"size" in entry &&
typeof (entry as { size?: unknown }).size === "number" &&
Number.isFinite((entry as { size: number }).size)
? Math.max(0, Math.floor((entry as { size: number }).size))
: 0;
return { path: p, type: t, size: s };
}
export async function extractArchive(params: {
archivePath: string;
destDir: string;
timeoutMs: number;
kind?: ArchiveKind;
stripComponents?: number;
tarGzip?: boolean;
limits?: ArchiveExtractLimits;
logger?: ArchiveLogger;
}): Promise<void> {
const kind = params.kind ?? resolveArchiveKind(params.archivePath);
if (!kind) {
throw new Error(`unsupported archive: ${params.archivePath}`);
}
const label = kind === "zip" ? "extract zip" : "extract tar";
if (kind === "tar") {
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
const limits = resolveExtractLimits(params.limits);
let entryCount = 0;
const budget = createByteBudgetTracker(limits);
await withTimeout(
tar.x({
file: params.archivePath,
cwd: params.destDir,
strip,
gzip: params.tarGzip,
preservePaths: false,
strict: true,
onReadEntry(entry) {
const info = readTarEntryInfo(entry);
try {
validateArchiveEntryPath(info.path);
const relPath = stripArchivePath(info.path, strip);
if (!relPath) {
return;
}
validateArchiveEntryPath(relPath);
resolveCheckedOutPath(params.destDir, relPath, info.path);
if (
info.type === "SymbolicLink" ||
info.type === "Link" ||
info.type === "BlockDevice" ||
info.type === "CharacterDevice" ||
info.type === "FIFO" ||
info.type === "Socket"
) {
throw new Error(`tar entry is a link: ${info.path}`);
}
entryCount += 1;
assertArchiveEntryCountWithinLimit(entryCount, limits);
budget.addEntrySize(info.size);
} catch (err) {
const error = err instanceof Error ? err : new Error(String(err));
// Node's EventEmitter calls listeners with `this` bound to the
// emitter (tar.Unpack), which exposes Parser.abort().
const emitter = this as unknown as { abort?: (error: Error) => void };
emitter.abort?.(error);
}
},
}),
params.timeoutMs,
label,
);
return;
}
await withTimeout(
extractZip({
archivePath: params.archivePath,
destDir: params.destDir,
stripComponents: params.stripComponents,
limits: params.limits,
}),
params.timeoutMs,
label,
);
}
export async function fileExists(filePath: string): Promise<boolean> {
try {
await fs.stat(filePath);
return true;
} catch {
return false;
}
}
export async function readJsonFile<T>(filePath: string): Promise<T> {
const raw = await fs.readFile(filePath, "utf-8");
return JSON.parse(raw) as T;
}