438 lines
13 KiB
TypeScript
438 lines
13 KiB
TypeScript
import { createWriteStream } from "node:fs";
|
|
import fs from "node:fs/promises";
|
|
import path from "node:path";
|
|
import { Readable, Transform } from "node:stream";
|
|
import { pipeline } from "node:stream/promises";
|
|
import JSZip from "jszip";
|
|
import * as tar from "tar";
|
|
import { resolveSafeBaseDir } from "./path-safety.js";
|
|
|
|
export type ArchiveKind = "tar" | "zip";
|
|
|
|
export type ArchiveLogger = {
|
|
info?: (message: string) => void;
|
|
warn?: (message: string) => void;
|
|
};
|
|
|
|
export type ArchiveExtractLimits = {
|
|
/**
|
|
* Max archive file bytes (compressed). Primarily protects zip extraction
|
|
* because we currently read the whole archive into memory for parsing.
|
|
*/
|
|
maxArchiveBytes?: number;
|
|
/** Max number of extracted entries (files + dirs). */
|
|
maxEntries?: number;
|
|
/** Max extracted bytes (sum of all files). */
|
|
maxExtractedBytes?: number;
|
|
/** Max extracted bytes for a single file entry. */
|
|
maxEntryBytes?: number;
|
|
};
|
|
|
|
/** @internal */
|
|
export const DEFAULT_MAX_ARCHIVE_BYTES_ZIP = 256 * 1024 * 1024;
|
|
/** @internal */
|
|
export const DEFAULT_MAX_ENTRIES = 50_000;
|
|
/** @internal */
|
|
export const DEFAULT_MAX_EXTRACTED_BYTES = 512 * 1024 * 1024;
|
|
/** @internal */
|
|
export const DEFAULT_MAX_ENTRY_BYTES = 256 * 1024 * 1024;
|
|
|
|
const ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT = "archive size exceeds limit";
|
|
const ERROR_ARCHIVE_ENTRY_COUNT_EXCEEDS_LIMIT = "archive entry count exceeds limit";
|
|
const ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT =
|
|
"archive entry extracted size exceeds limit";
|
|
const ERROR_ARCHIVE_EXTRACTED_SIZE_EXCEEDS_LIMIT = "archive extracted size exceeds limit";
|
|
|
|
const TAR_SUFFIXES = [".tgz", ".tar.gz", ".tar"];
|
|
|
|
export function resolveArchiveKind(filePath: string): ArchiveKind | null {
|
|
const lower = filePath.toLowerCase();
|
|
if (lower.endsWith(".zip")) {
|
|
return "zip";
|
|
}
|
|
if (TAR_SUFFIXES.some((suffix) => lower.endsWith(suffix))) {
|
|
return "tar";
|
|
}
|
|
return null;
|
|
}
|
|
|
|
export async function resolvePackedRootDir(extractDir: string): Promise<string> {
|
|
const direct = path.join(extractDir, "package");
|
|
try {
|
|
const stat = await fs.stat(direct);
|
|
if (stat.isDirectory()) {
|
|
return direct;
|
|
}
|
|
} catch {
|
|
// ignore
|
|
}
|
|
|
|
const entries = await fs.readdir(extractDir, { withFileTypes: true });
|
|
const dirs = entries.filter((entry) => entry.isDirectory()).map((entry) => entry.name);
|
|
if (dirs.length !== 1) {
|
|
throw new Error(`unexpected archive layout (dirs: ${dirs.join(", ")})`);
|
|
}
|
|
const onlyDir = dirs[0];
|
|
if (!onlyDir) {
|
|
throw new Error("unexpected archive layout (no package dir found)");
|
|
}
|
|
return path.join(extractDir, onlyDir);
|
|
}
|
|
|
|
export async function withTimeout<T>(
|
|
promise: Promise<T>,
|
|
timeoutMs: number,
|
|
label: string,
|
|
): Promise<T> {
|
|
let timeoutId: ReturnType<typeof setTimeout> | undefined;
|
|
try {
|
|
return await Promise.race([
|
|
promise,
|
|
new Promise<T>((_, reject) => {
|
|
timeoutId = setTimeout(
|
|
() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)),
|
|
timeoutMs,
|
|
);
|
|
}),
|
|
]);
|
|
} finally {
|
|
if (timeoutId) {
|
|
clearTimeout(timeoutId);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Path hygiene.
|
|
function normalizeArchivePath(raw: string): string {
|
|
// Archives may contain Windows separators; treat them as separators.
|
|
return raw.replaceAll("\\", "/");
|
|
}
|
|
|
|
function isWindowsDrivePath(p: string): boolean {
|
|
return /^[a-zA-Z]:[\\/]/.test(p);
|
|
}
|
|
|
|
function validateArchiveEntryPath(entryPath: string): void {
|
|
if (!entryPath || entryPath === "." || entryPath === "./") {
|
|
return;
|
|
}
|
|
if (isWindowsDrivePath(entryPath)) {
|
|
throw new Error(`archive entry uses a drive path: ${entryPath}`);
|
|
}
|
|
const normalized = path.posix.normalize(normalizeArchivePath(entryPath));
|
|
if (normalized === ".." || normalized.startsWith("../")) {
|
|
throw new Error(`archive entry escapes destination: ${entryPath}`);
|
|
}
|
|
if (path.posix.isAbsolute(normalized) || normalized.startsWith("//")) {
|
|
throw new Error(`archive entry is absolute: ${entryPath}`);
|
|
}
|
|
}
|
|
|
|
function stripArchivePath(entryPath: string, stripComponents: number): string | null {
|
|
const raw = normalizeArchivePath(entryPath);
|
|
if (!raw || raw === "." || raw === "./") {
|
|
return null;
|
|
}
|
|
|
|
// Important: mimic tar --strip-components semantics (raw segments before
|
|
// normalization) so strip-induced escapes like "a/../b" are not hidden.
|
|
const parts = raw.split("/").filter((part) => part.length > 0 && part !== ".");
|
|
const strip = Math.max(0, Math.floor(stripComponents));
|
|
const stripped = strip === 0 ? parts.join("/") : parts.slice(strip).join("/");
|
|
const result = path.posix.normalize(stripped);
|
|
if (!result || result === "." || result === "./") {
|
|
return null;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
function resolveCheckedOutPath(destDir: string, relPath: string, original: string): string {
|
|
const safeBase = resolveSafeBaseDir(destDir);
|
|
const outPath = path.resolve(destDir, relPath);
|
|
if (!outPath.startsWith(safeBase)) {
|
|
throw new Error(`archive entry escapes destination: ${original}`);
|
|
}
|
|
return outPath;
|
|
}
|
|
|
|
type ResolvedArchiveExtractLimits = Required<ArchiveExtractLimits>;
|
|
|
|
function clampLimit(value: number | undefined): number | undefined {
|
|
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
return undefined;
|
|
}
|
|
const v = Math.floor(value);
|
|
return v > 0 ? v : undefined;
|
|
}
|
|
|
|
function resolveExtractLimits(limits?: ArchiveExtractLimits): ResolvedArchiveExtractLimits {
|
|
// Defaults: defensive, but should not break normal installs.
|
|
return {
|
|
maxArchiveBytes: clampLimit(limits?.maxArchiveBytes) ?? DEFAULT_MAX_ARCHIVE_BYTES_ZIP,
|
|
maxEntries: clampLimit(limits?.maxEntries) ?? DEFAULT_MAX_ENTRIES,
|
|
maxExtractedBytes: clampLimit(limits?.maxExtractedBytes) ?? DEFAULT_MAX_EXTRACTED_BYTES,
|
|
maxEntryBytes: clampLimit(limits?.maxEntryBytes) ?? DEFAULT_MAX_ENTRY_BYTES,
|
|
};
|
|
}
|
|
|
|
function assertArchiveEntryCountWithinLimit(
|
|
entryCount: number,
|
|
limits: ResolvedArchiveExtractLimits,
|
|
) {
|
|
if (entryCount > limits.maxEntries) {
|
|
throw new Error(ERROR_ARCHIVE_ENTRY_COUNT_EXCEEDS_LIMIT);
|
|
}
|
|
}
|
|
|
|
function createByteBudgetTracker(limits: ResolvedArchiveExtractLimits): {
|
|
startEntry: () => void;
|
|
addBytes: (bytes: number) => void;
|
|
addEntrySize: (size: number) => void;
|
|
} {
|
|
let entryBytes = 0;
|
|
let extractedBytes = 0;
|
|
|
|
const addBytes = (bytes: number) => {
|
|
const b = Math.max(0, Math.floor(bytes));
|
|
if (b === 0) {
|
|
return;
|
|
}
|
|
entryBytes += b;
|
|
if (entryBytes > limits.maxEntryBytes) {
|
|
throw new Error(ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT);
|
|
}
|
|
extractedBytes += b;
|
|
if (extractedBytes > limits.maxExtractedBytes) {
|
|
throw new Error(ERROR_ARCHIVE_EXTRACTED_SIZE_EXCEEDS_LIMIT);
|
|
}
|
|
};
|
|
|
|
return {
|
|
startEntry() {
|
|
entryBytes = 0;
|
|
},
|
|
addBytes,
|
|
addEntrySize(size: number) {
|
|
const s = Math.max(0, Math.floor(size));
|
|
if (s > limits.maxEntryBytes) {
|
|
throw new Error(ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT);
|
|
}
|
|
// Note: tar budgets are based on the header-declared size.
|
|
addBytes(s);
|
|
},
|
|
};
|
|
}
|
|
|
|
function createExtractBudgetTransform(params: {
|
|
onChunkBytes: (bytes: number) => void;
|
|
}): Transform {
|
|
return new Transform({
|
|
transform(chunk, _encoding, callback) {
|
|
try {
|
|
const buf = chunk instanceof Buffer ? chunk : Buffer.from(chunk as Uint8Array);
|
|
params.onChunkBytes(buf.byteLength);
|
|
callback(null, buf);
|
|
} catch (err) {
|
|
callback(err instanceof Error ? err : new Error(String(err)));
|
|
}
|
|
},
|
|
});
|
|
}
|
|
|
|
type ZipEntry = {
|
|
name: string;
|
|
dir: boolean;
|
|
unixPermissions?: number;
|
|
nodeStream?: () => NodeJS.ReadableStream;
|
|
async: (type: "nodebuffer") => Promise<Buffer>;
|
|
};
|
|
|
|
async function readZipEntryStream(entry: ZipEntry): Promise<NodeJS.ReadableStream> {
|
|
if (typeof entry.nodeStream === "function") {
|
|
return entry.nodeStream();
|
|
}
|
|
// Old JSZip: fall back to buffering, but still extract via a stream.
|
|
const buf = await entry.async("nodebuffer");
|
|
return Readable.from(buf);
|
|
}
|
|
|
|
async function extractZip(params: {
|
|
archivePath: string;
|
|
destDir: string;
|
|
stripComponents?: number;
|
|
limits?: ArchiveExtractLimits;
|
|
}): Promise<void> {
|
|
const limits = resolveExtractLimits(params.limits);
|
|
const stat = await fs.stat(params.archivePath);
|
|
if (stat.size > limits.maxArchiveBytes) {
|
|
throw new Error(ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT);
|
|
}
|
|
|
|
const buffer = await fs.readFile(params.archivePath);
|
|
const zip = await JSZip.loadAsync(buffer);
|
|
const entries = Object.values(zip.files) as ZipEntry[];
|
|
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
|
|
|
|
assertArchiveEntryCountWithinLimit(entries.length, limits);
|
|
|
|
const budget = createByteBudgetTracker(limits);
|
|
|
|
for (const entry of entries) {
|
|
validateArchiveEntryPath(entry.name);
|
|
|
|
const relPath = stripArchivePath(entry.name, strip);
|
|
if (!relPath) {
|
|
continue;
|
|
}
|
|
validateArchiveEntryPath(relPath);
|
|
|
|
const outPath = resolveCheckedOutPath(params.destDir, relPath, entry.name);
|
|
if (entry.dir) {
|
|
await fs.mkdir(outPath, { recursive: true });
|
|
continue;
|
|
}
|
|
|
|
await fs.mkdir(path.dirname(outPath), { recursive: true });
|
|
budget.startEntry();
|
|
const readable = await readZipEntryStream(entry);
|
|
|
|
try {
|
|
await pipeline(
|
|
readable,
|
|
createExtractBudgetTransform({ onChunkBytes: budget.addBytes }),
|
|
createWriteStream(outPath),
|
|
);
|
|
} catch (err) {
|
|
await fs.unlink(outPath).catch(() => undefined);
|
|
throw err;
|
|
}
|
|
|
|
// Best-effort permission restore for zip entries created on unix.
|
|
if (typeof entry.unixPermissions === "number") {
|
|
const mode = entry.unixPermissions & 0o777;
|
|
if (mode !== 0) {
|
|
await fs.chmod(outPath, mode).catch(() => undefined);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type TarEntryInfo = { path: string; type: string; size: number };
|
|
|
|
function readTarEntryInfo(entry: unknown): TarEntryInfo {
|
|
const p =
|
|
typeof entry === "object" && entry !== null && "path" in entry
|
|
? String((entry as { path: unknown }).path)
|
|
: "";
|
|
const t =
|
|
typeof entry === "object" && entry !== null && "type" in entry
|
|
? String((entry as { type: unknown }).type)
|
|
: "";
|
|
const s =
|
|
typeof entry === "object" &&
|
|
entry !== null &&
|
|
"size" in entry &&
|
|
typeof (entry as { size?: unknown }).size === "number" &&
|
|
Number.isFinite((entry as { size: number }).size)
|
|
? Math.max(0, Math.floor((entry as { size: number }).size))
|
|
: 0;
|
|
return { path: p, type: t, size: s };
|
|
}
|
|
|
|
export async function extractArchive(params: {
|
|
archivePath: string;
|
|
destDir: string;
|
|
timeoutMs: number;
|
|
kind?: ArchiveKind;
|
|
stripComponents?: number;
|
|
tarGzip?: boolean;
|
|
limits?: ArchiveExtractLimits;
|
|
logger?: ArchiveLogger;
|
|
}): Promise<void> {
|
|
const kind = params.kind ?? resolveArchiveKind(params.archivePath);
|
|
if (!kind) {
|
|
throw new Error(`unsupported archive: ${params.archivePath}`);
|
|
}
|
|
|
|
const label = kind === "zip" ? "extract zip" : "extract tar";
|
|
if (kind === "tar") {
|
|
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
|
|
const limits = resolveExtractLimits(params.limits);
|
|
let entryCount = 0;
|
|
const budget = createByteBudgetTracker(limits);
|
|
await withTimeout(
|
|
tar.x({
|
|
file: params.archivePath,
|
|
cwd: params.destDir,
|
|
strip,
|
|
gzip: params.tarGzip,
|
|
preservePaths: false,
|
|
strict: true,
|
|
onReadEntry(entry) {
|
|
const info = readTarEntryInfo(entry);
|
|
|
|
try {
|
|
validateArchiveEntryPath(info.path);
|
|
|
|
const relPath = stripArchivePath(info.path, strip);
|
|
if (!relPath) {
|
|
return;
|
|
}
|
|
validateArchiveEntryPath(relPath);
|
|
resolveCheckedOutPath(params.destDir, relPath, info.path);
|
|
|
|
if (
|
|
info.type === "SymbolicLink" ||
|
|
info.type === "Link" ||
|
|
info.type === "BlockDevice" ||
|
|
info.type === "CharacterDevice" ||
|
|
info.type === "FIFO" ||
|
|
info.type === "Socket"
|
|
) {
|
|
throw new Error(`tar entry is a link: ${info.path}`);
|
|
}
|
|
|
|
entryCount += 1;
|
|
assertArchiveEntryCountWithinLimit(entryCount, limits);
|
|
budget.addEntrySize(info.size);
|
|
} catch (err) {
|
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
// Node's EventEmitter calls listeners with `this` bound to the
|
|
// emitter (tar.Unpack), which exposes Parser.abort().
|
|
const emitter = this as unknown as { abort?: (error: Error) => void };
|
|
emitter.abort?.(error);
|
|
}
|
|
},
|
|
}),
|
|
params.timeoutMs,
|
|
label,
|
|
);
|
|
return;
|
|
}
|
|
|
|
await withTimeout(
|
|
extractZip({
|
|
archivePath: params.archivePath,
|
|
destDir: params.destDir,
|
|
stripComponents: params.stripComponents,
|
|
limits: params.limits,
|
|
}),
|
|
params.timeoutMs,
|
|
label,
|
|
);
|
|
}
|
|
|
|
export async function fileExists(filePath: string): Promise<boolean> {
|
|
try {
|
|
await fs.stat(filePath);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
export async function readJsonFile<T>(filePath: string): Promise<T> {
|
|
const raw = await fs.readFile(filePath, "utf-8");
|
|
return JSON.parse(raw) as T;
|
|
}
|