feat(repositories): healthchecks and doctor command

This commit is contained in:
Nicolas Meienberger
2025-11-08 10:11:07 +01:00
parent f5339d3708
commit 4dc239139f
14 changed files with 554 additions and 61 deletions

View File

@@ -0,0 +1,26 @@
import { Job } from "../core/scheduler";
import { repositoriesService } from "../modules/repositories/repositories.service";
import { logger } from "../utils/logger";
import { db } from "../db/db";
import { eq, or } from "drizzle-orm";
import { repositoriesTable } from "../db/schema";
export class RepositoryHealthCheckJob extends Job {
async run() {
logger.debug("Running health check for all repositories...");
const repositories = await db.query.repositoriesTable.findMany({
where: or(eq(repositoriesTable.status, "healthy"), eq(repositoriesTable.status, "error")),
});
for (const repository of repositories) {
try {
await repositoriesService.checkHealth(repository.id);
} catch (error) {
logger.error(`Health check failed for repository ${repository.name}:`, error);
}
}
return { done: true, timestamp: new Date() };
}
}

View File

@@ -7,6 +7,7 @@ import { restic } from "../../utils/restic";
import { volumeService } from "../volumes/volume.service";
import { CleanupDanglingMountsJob } from "../../jobs/cleanup-dangling";
import { VolumeHealthCheckJob } from "../../jobs/healthchecks";
import { RepositoryHealthCheckJob } from "../../jobs/repository-healthchecks";
import { BackupExecutionJob } from "../../jobs/backup-execution";
import { CleanupSessionsJob } from "../../jobs/cleanup-sessions";
@@ -32,6 +33,7 @@ export const startup = async () => {
Scheduler.build(CleanupDanglingMountsJob).schedule("0 * * * *");
Scheduler.build(VolumeHealthCheckJob).schedule("*/5 * * * *");
Scheduler.build(RepositoryHealthCheckJob).schedule("*/10 * * * *");
Scheduler.build(BackupExecutionJob).schedule("* * * * *");
Scheduler.build(CleanupSessionsJob).schedule("0 0 * * *");
};

View File

@@ -4,6 +4,7 @@ import {
createRepositoryBody,
createRepositoryDto,
deleteRepositoryDto,
doctorRepositoryDto,
getRepositoryDto,
getSnapshotDetailsDto,
listRepositoriesDto,
@@ -14,6 +15,7 @@ import {
restoreSnapshotBody,
restoreSnapshotDto,
type DeleteRepositoryDto,
type DoctorRepositoryDto,
type GetRepositoryDto,
type GetSnapshotDetailsDto,
type ListRepositoriesDto,
@@ -71,6 +73,8 @@ export const repositoriesController = new Hono()
};
});
c.header("Cache-Control", "public, max-age=10, stale-while-revalidate=60");
return c.json<ListSnapshotsDto>(snapshots, 200);
})
.get("/:name/snapshots/:snapshotId", getSnapshotDetailsDto, async (c) => {
@@ -116,4 +120,11 @@ export const repositoriesController = new Hono()
const result = await repositoriesService.restoreSnapshot(name, snapshotId, options);
return c.json<RestoreSnapshotDto>(result, 200);
})
.post("/:name/doctor", doctorRepositoryDto, async (c) => {
const { name } = c.req.param();
const result = await repositoriesService.doctorRepository(name);
return c.json<DoctorRepositoryDto>(result, 200);
});

View File

@@ -271,3 +271,38 @@ export const restoreSnapshotDto = describeRoute({
},
},
});
/**
* Doctor a repository (unlock, check, repair)
*/
export const doctorStepSchema = type({
step: "string",
success: "boolean",
output: "string?",
error: "string?",
});
export const doctorRepositoryResponse = type({
success: "boolean",
message: "string",
steps: doctorStepSchema.array(),
});
export type DoctorRepositoryDto = typeof doctorRepositoryResponse.infer;
export const doctorRepositoryDto = describeRoute({
description:
"Run doctor operations on a repository to fix common issues (unlock, check, repair index). Use this when the repository is locked or has errors.",
tags: ["Repositories"],
operationId: "doctorRepository",
responses: {
200: {
description: "Doctor operation completed",
content: {
"application/json": {
schema: resolver(doctorRepositoryResponse),
},
},
},
},
});

View File

@@ -4,7 +4,7 @@ import { eq } from "drizzle-orm";
import { ConflictError, InternalServerError, NotFoundError } from "http-errors-enhanced";
import slugify from "slugify";
import { db } from "../../db/db";
import { repositoriesTable, volumesTable } from "../../db/schema";
import { repositoriesTable } from "../../db/schema";
import { toMessage } from "../../utils/errors";
import { restic } from "../../utils/restic";
import { cryptoUtils } from "../../utils/crypto";
@@ -202,6 +202,112 @@ const getSnapshotDetails = async (name: string, snapshotId: string) => {
return snapshot;
};
const checkHealth = async (repositoryId: string) => {
const repository = await db.query.repositoriesTable.findFirst({
where: eq(repositoriesTable.id, repositoryId),
});
if (!repository) {
throw new NotFoundError("Repository not found");
}
const { error, status } = await restic
.snapshots(repository.config)
.then(() => ({ error: null, status: "healthy" as const }))
.catch((error) => ({ error: toMessage(error), status: "error" as const }));
await db
.update(repositoriesTable)
.set({
status,
lastChecked: Date.now(),
lastError: error,
})
.where(eq(repositoriesTable.id, repository.id));
return { status, lastError: error };
};
const doctorRepository = async (name: string) => {
const repository = await db.query.repositoriesTable.findFirst({
where: eq(repositoriesTable.name, name),
});
if (!repository) {
throw new NotFoundError("Repository not found");
}
const steps: Array<{ step: string; success: boolean; output: string | null; error: string | null }> = [];
const unlockResult = await restic.unlock(repository.config).then(
(result) => ({ success: true, message: result.message, error: null }),
(error) => ({ success: false, message: null, error: toMessage(error) }),
);
steps.push({
step: "unlock",
success: unlockResult.success,
output: unlockResult.message,
error: unlockResult.error,
});
const checkResult = await restic.check(repository.config, { readData: false }).then(
(result) => result,
(error) => ({ success: false, output: null, error: toMessage(error), hasErrors: true }),
);
steps.push({
step: "check",
success: checkResult.success,
output: checkResult.output,
error: checkResult.error,
});
if (checkResult.hasErrors) {
const repairResult = await restic.repairIndex(repository.config).then(
(result) => ({ success: true, output: result.output, error: null }),
(error) => ({ success: false, output: null, error: toMessage(error) }),
);
steps.push({
step: "repair_index",
success: repairResult.success,
output: repairResult.output,
error: repairResult.error,
});
const recheckResult = await restic.check(repository.config, { readData: false }).then(
(result) => result,
(error) => ({ success: false, output: null, error: toMessage(error), hasErrors: true }),
);
steps.push({
step: "recheck",
success: recheckResult.success,
output: recheckResult.output,
error: recheckResult.error,
});
}
const allSuccessful = steps.every((s) => s.success);
console.log("Doctor steps:", steps);
await db
.update(repositoriesTable)
.set({
status: allSuccessful ? "healthy" : "error",
lastChecked: Date.now(),
lastError: allSuccessful ? null : steps.find((s) => !s.success)?.error,
})
.where(eq(repositoriesTable.id, repository.id));
return {
success: allSuccessful,
steps,
};
};
export const repositoriesService = {
listRepositories,
createRepository,
@@ -211,4 +317,6 @@ export const repositoriesService = {
listSnapshotFiles,
restoreSnapshot,
getSnapshotDetails,
checkHealth,
doctorRepository,
};

View File

@@ -148,7 +148,7 @@ const backup = async (
args.push("--json");
await $`restic unlock --repo ${repoUrl}`.env(env).nothrow();
// await $`restic unlock --repo ${repoUrl}`.env(env).nothrow();
const res = await $`restic ${args}`.env(env).nothrow();
if (includeFile) {
@@ -334,7 +334,7 @@ const forget = async (config: RepositoryConfig, options: RetentionPolicy, extra:
args.push("--prune");
args.push("--json");
await $`restic unlock --repo ${repoUrl}`.env(env).nothrow();
// await $`restic unlock --repo ${repoUrl}`.env(env).nothrow();
const res = await $`restic ${args}`.env(env).nothrow();
if (res.exitCode !== 0) {
@@ -425,6 +425,79 @@ const ls = async (config: RepositoryConfig, snapshotId: string, path?: string) =
return { snapshot, nodes };
};
const unlock = async (config: RepositoryConfig) => {
const repoUrl = buildRepoUrl(config);
const env = await buildEnv(config);
const res = await $`restic unlock --repo ${repoUrl} --json`.env(env).nothrow();
if (res.exitCode !== 0) {
logger.error(`Restic unlock failed: ${res.stderr}`);
throw new Error(`Restic unlock failed: ${res.stderr}`);
}
logger.info(`Restic unlock succeeded for repository: ${repoUrl}`);
return { success: true, message: "Repository unlocked successfully" };
};
const check = async (config: RepositoryConfig, options?: { readData?: boolean }) => {
const repoUrl = buildRepoUrl(config);
const env = await buildEnv(config);
const args: string[] = ["--repo", repoUrl, "check"];
if (options?.readData) {
args.push("--read-data");
}
const res = await $`restic ${args}`.env(env).nothrow();
const stdout = res.text();
const stderr = res.stderr.toString();
if (res.exitCode !== 0) {
logger.error(`Restic check failed: ${stderr}`);
return {
success: false,
hasErrors: true,
output: stdout,
error: stderr,
};
}
const hasErrors = stdout.includes("error") || stdout.includes("Fatal");
logger.info(`Restic check completed for repository: ${repoUrl}`);
return {
success: !hasErrors,
hasErrors,
output: stdout,
error: hasErrors ? "Repository contains errors" : null,
};
};
const repairIndex = async (config: RepositoryConfig) => {
const repoUrl = buildRepoUrl(config);
const env = await buildEnv(config);
const res = await $`restic repair index --repo ${repoUrl}`.env(env).nothrow();
const stdout = res.text();
const stderr = res.stderr.toString();
if (res.exitCode !== 0) {
logger.error(`Restic repair index failed: ${stderr}`);
throw new Error(`Restic repair index failed: ${stderr}`);
}
logger.info(`Restic repair index completed for repository: ${repoUrl}`);
return {
success: true,
output: stdout,
message: "Index repaired successfully",
};
};
export const restic = {
ensurePassfile,
init,
@@ -432,5 +505,8 @@ export const restic = {
restore,
snapshots,
forget,
unlock,
ls,
check,
repairIndex,
};