mirror of
https://github.com/nicotsx/ironmount.git
synced 2025-12-10 12:10:51 +01:00
feat(repositories): healthchecks and doctor command
This commit is contained in:
26
apps/server/src/jobs/repository-healthchecks.ts
Normal file
26
apps/server/src/jobs/repository-healthchecks.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { Job } from "../core/scheduler";
|
||||
import { repositoriesService } from "../modules/repositories/repositories.service";
|
||||
import { logger } from "../utils/logger";
|
||||
import { db } from "../db/db";
|
||||
import { eq, or } from "drizzle-orm";
|
||||
import { repositoriesTable } from "../db/schema";
|
||||
|
||||
export class RepositoryHealthCheckJob extends Job {
|
||||
async run() {
|
||||
logger.debug("Running health check for all repositories...");
|
||||
|
||||
const repositories = await db.query.repositoriesTable.findMany({
|
||||
where: or(eq(repositoriesTable.status, "healthy"), eq(repositoriesTable.status, "error")),
|
||||
});
|
||||
|
||||
for (const repository of repositories) {
|
||||
try {
|
||||
await repositoriesService.checkHealth(repository.id);
|
||||
} catch (error) {
|
||||
logger.error(`Health check failed for repository ${repository.name}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
return { done: true, timestamp: new Date() };
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import { restic } from "../../utils/restic";
|
||||
import { volumeService } from "../volumes/volume.service";
|
||||
import { CleanupDanglingMountsJob } from "../../jobs/cleanup-dangling";
|
||||
import { VolumeHealthCheckJob } from "../../jobs/healthchecks";
|
||||
import { RepositoryHealthCheckJob } from "../../jobs/repository-healthchecks";
|
||||
import { BackupExecutionJob } from "../../jobs/backup-execution";
|
||||
import { CleanupSessionsJob } from "../../jobs/cleanup-sessions";
|
||||
|
||||
@@ -32,6 +33,7 @@ export const startup = async () => {
|
||||
|
||||
Scheduler.build(CleanupDanglingMountsJob).schedule("0 * * * *");
|
||||
Scheduler.build(VolumeHealthCheckJob).schedule("*/5 * * * *");
|
||||
Scheduler.build(RepositoryHealthCheckJob).schedule("*/10 * * * *");
|
||||
Scheduler.build(BackupExecutionJob).schedule("* * * * *");
|
||||
Scheduler.build(CleanupSessionsJob).schedule("0 0 * * *");
|
||||
};
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
createRepositoryBody,
|
||||
createRepositoryDto,
|
||||
deleteRepositoryDto,
|
||||
doctorRepositoryDto,
|
||||
getRepositoryDto,
|
||||
getSnapshotDetailsDto,
|
||||
listRepositoriesDto,
|
||||
@@ -14,6 +15,7 @@ import {
|
||||
restoreSnapshotBody,
|
||||
restoreSnapshotDto,
|
||||
type DeleteRepositoryDto,
|
||||
type DoctorRepositoryDto,
|
||||
type GetRepositoryDto,
|
||||
type GetSnapshotDetailsDto,
|
||||
type ListRepositoriesDto,
|
||||
@@ -71,6 +73,8 @@ export const repositoriesController = new Hono()
|
||||
};
|
||||
});
|
||||
|
||||
c.header("Cache-Control", "public, max-age=10, stale-while-revalidate=60");
|
||||
|
||||
return c.json<ListSnapshotsDto>(snapshots, 200);
|
||||
})
|
||||
.get("/:name/snapshots/:snapshotId", getSnapshotDetailsDto, async (c) => {
|
||||
@@ -116,4 +120,11 @@ export const repositoriesController = new Hono()
|
||||
const result = await repositoriesService.restoreSnapshot(name, snapshotId, options);
|
||||
|
||||
return c.json<RestoreSnapshotDto>(result, 200);
|
||||
})
|
||||
.post("/:name/doctor", doctorRepositoryDto, async (c) => {
|
||||
const { name } = c.req.param();
|
||||
|
||||
const result = await repositoriesService.doctorRepository(name);
|
||||
|
||||
return c.json<DoctorRepositoryDto>(result, 200);
|
||||
});
|
||||
|
||||
@@ -271,3 +271,38 @@ export const restoreSnapshotDto = describeRoute({
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Doctor a repository (unlock, check, repair)
|
||||
*/
|
||||
export const doctorStepSchema = type({
|
||||
step: "string",
|
||||
success: "boolean",
|
||||
output: "string?",
|
||||
error: "string?",
|
||||
});
|
||||
|
||||
export const doctorRepositoryResponse = type({
|
||||
success: "boolean",
|
||||
message: "string",
|
||||
steps: doctorStepSchema.array(),
|
||||
});
|
||||
|
||||
export type DoctorRepositoryDto = typeof doctorRepositoryResponse.infer;
|
||||
|
||||
export const doctorRepositoryDto = describeRoute({
|
||||
description:
|
||||
"Run doctor operations on a repository to fix common issues (unlock, check, repair index). Use this when the repository is locked or has errors.",
|
||||
tags: ["Repositories"],
|
||||
operationId: "doctorRepository",
|
||||
responses: {
|
||||
200: {
|
||||
description: "Doctor operation completed",
|
||||
content: {
|
||||
"application/json": {
|
||||
schema: resolver(doctorRepositoryResponse),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -4,7 +4,7 @@ import { eq } from "drizzle-orm";
|
||||
import { ConflictError, InternalServerError, NotFoundError } from "http-errors-enhanced";
|
||||
import slugify from "slugify";
|
||||
import { db } from "../../db/db";
|
||||
import { repositoriesTable, volumesTable } from "../../db/schema";
|
||||
import { repositoriesTable } from "../../db/schema";
|
||||
import { toMessage } from "../../utils/errors";
|
||||
import { restic } from "../../utils/restic";
|
||||
import { cryptoUtils } from "../../utils/crypto";
|
||||
@@ -202,6 +202,112 @@ const getSnapshotDetails = async (name: string, snapshotId: string) => {
|
||||
return snapshot;
|
||||
};
|
||||
|
||||
const checkHealth = async (repositoryId: string) => {
|
||||
const repository = await db.query.repositoriesTable.findFirst({
|
||||
where: eq(repositoriesTable.id, repositoryId),
|
||||
});
|
||||
|
||||
if (!repository) {
|
||||
throw new NotFoundError("Repository not found");
|
||||
}
|
||||
|
||||
const { error, status } = await restic
|
||||
.snapshots(repository.config)
|
||||
.then(() => ({ error: null, status: "healthy" as const }))
|
||||
.catch((error) => ({ error: toMessage(error), status: "error" as const }));
|
||||
|
||||
await db
|
||||
.update(repositoriesTable)
|
||||
.set({
|
||||
status,
|
||||
lastChecked: Date.now(),
|
||||
lastError: error,
|
||||
})
|
||||
.where(eq(repositoriesTable.id, repository.id));
|
||||
|
||||
return { status, lastError: error };
|
||||
};
|
||||
|
||||
const doctorRepository = async (name: string) => {
|
||||
const repository = await db.query.repositoriesTable.findFirst({
|
||||
where: eq(repositoriesTable.name, name),
|
||||
});
|
||||
|
||||
if (!repository) {
|
||||
throw new NotFoundError("Repository not found");
|
||||
}
|
||||
|
||||
const steps: Array<{ step: string; success: boolean; output: string | null; error: string | null }> = [];
|
||||
|
||||
const unlockResult = await restic.unlock(repository.config).then(
|
||||
(result) => ({ success: true, message: result.message, error: null }),
|
||||
(error) => ({ success: false, message: null, error: toMessage(error) }),
|
||||
);
|
||||
|
||||
steps.push({
|
||||
step: "unlock",
|
||||
success: unlockResult.success,
|
||||
output: unlockResult.message,
|
||||
error: unlockResult.error,
|
||||
});
|
||||
|
||||
const checkResult = await restic.check(repository.config, { readData: false }).then(
|
||||
(result) => result,
|
||||
(error) => ({ success: false, output: null, error: toMessage(error), hasErrors: true }),
|
||||
);
|
||||
|
||||
steps.push({
|
||||
step: "check",
|
||||
success: checkResult.success,
|
||||
output: checkResult.output,
|
||||
error: checkResult.error,
|
||||
});
|
||||
|
||||
if (checkResult.hasErrors) {
|
||||
const repairResult = await restic.repairIndex(repository.config).then(
|
||||
(result) => ({ success: true, output: result.output, error: null }),
|
||||
(error) => ({ success: false, output: null, error: toMessage(error) }),
|
||||
);
|
||||
|
||||
steps.push({
|
||||
step: "repair_index",
|
||||
success: repairResult.success,
|
||||
output: repairResult.output,
|
||||
error: repairResult.error,
|
||||
});
|
||||
|
||||
const recheckResult = await restic.check(repository.config, { readData: false }).then(
|
||||
(result) => result,
|
||||
(error) => ({ success: false, output: null, error: toMessage(error), hasErrors: true }),
|
||||
);
|
||||
|
||||
steps.push({
|
||||
step: "recheck",
|
||||
success: recheckResult.success,
|
||||
output: recheckResult.output,
|
||||
error: recheckResult.error,
|
||||
});
|
||||
}
|
||||
|
||||
const allSuccessful = steps.every((s) => s.success);
|
||||
|
||||
console.log("Doctor steps:", steps);
|
||||
|
||||
await db
|
||||
.update(repositoriesTable)
|
||||
.set({
|
||||
status: allSuccessful ? "healthy" : "error",
|
||||
lastChecked: Date.now(),
|
||||
lastError: allSuccessful ? null : steps.find((s) => !s.success)?.error,
|
||||
})
|
||||
.where(eq(repositoriesTable.id, repository.id));
|
||||
|
||||
return {
|
||||
success: allSuccessful,
|
||||
steps,
|
||||
};
|
||||
};
|
||||
|
||||
export const repositoriesService = {
|
||||
listRepositories,
|
||||
createRepository,
|
||||
@@ -211,4 +317,6 @@ export const repositoriesService = {
|
||||
listSnapshotFiles,
|
||||
restoreSnapshot,
|
||||
getSnapshotDetails,
|
||||
checkHealth,
|
||||
doctorRepository,
|
||||
};
|
||||
|
||||
@@ -148,7 +148,7 @@ const backup = async (
|
||||
|
||||
args.push("--json");
|
||||
|
||||
await $`restic unlock --repo ${repoUrl}`.env(env).nothrow();
|
||||
// await $`restic unlock --repo ${repoUrl}`.env(env).nothrow();
|
||||
const res = await $`restic ${args}`.env(env).nothrow();
|
||||
|
||||
if (includeFile) {
|
||||
@@ -334,7 +334,7 @@ const forget = async (config: RepositoryConfig, options: RetentionPolicy, extra:
|
||||
args.push("--prune");
|
||||
args.push("--json");
|
||||
|
||||
await $`restic unlock --repo ${repoUrl}`.env(env).nothrow();
|
||||
// await $`restic unlock --repo ${repoUrl}`.env(env).nothrow();
|
||||
const res = await $`restic ${args}`.env(env).nothrow();
|
||||
|
||||
if (res.exitCode !== 0) {
|
||||
@@ -425,6 +425,79 @@ const ls = async (config: RepositoryConfig, snapshotId: string, path?: string) =
|
||||
return { snapshot, nodes };
|
||||
};
|
||||
|
||||
const unlock = async (config: RepositoryConfig) => {
|
||||
const repoUrl = buildRepoUrl(config);
|
||||
const env = await buildEnv(config);
|
||||
|
||||
const res = await $`restic unlock --repo ${repoUrl} --json`.env(env).nothrow();
|
||||
|
||||
if (res.exitCode !== 0) {
|
||||
logger.error(`Restic unlock failed: ${res.stderr}`);
|
||||
throw new Error(`Restic unlock failed: ${res.stderr}`);
|
||||
}
|
||||
|
||||
logger.info(`Restic unlock succeeded for repository: ${repoUrl}`);
|
||||
return { success: true, message: "Repository unlocked successfully" };
|
||||
};
|
||||
|
||||
const check = async (config: RepositoryConfig, options?: { readData?: boolean }) => {
|
||||
const repoUrl = buildRepoUrl(config);
|
||||
const env = await buildEnv(config);
|
||||
|
||||
const args: string[] = ["--repo", repoUrl, "check"];
|
||||
|
||||
if (options?.readData) {
|
||||
args.push("--read-data");
|
||||
}
|
||||
|
||||
const res = await $`restic ${args}`.env(env).nothrow();
|
||||
|
||||
const stdout = res.text();
|
||||
const stderr = res.stderr.toString();
|
||||
|
||||
if (res.exitCode !== 0) {
|
||||
logger.error(`Restic check failed: ${stderr}`);
|
||||
return {
|
||||
success: false,
|
||||
hasErrors: true,
|
||||
output: stdout,
|
||||
error: stderr,
|
||||
};
|
||||
}
|
||||
|
||||
const hasErrors = stdout.includes("error") || stdout.includes("Fatal");
|
||||
|
||||
logger.info(`Restic check completed for repository: ${repoUrl}`);
|
||||
return {
|
||||
success: !hasErrors,
|
||||
hasErrors,
|
||||
output: stdout,
|
||||
error: hasErrors ? "Repository contains errors" : null,
|
||||
};
|
||||
};
|
||||
|
||||
const repairIndex = async (config: RepositoryConfig) => {
|
||||
const repoUrl = buildRepoUrl(config);
|
||||
const env = await buildEnv(config);
|
||||
|
||||
const res = await $`restic repair index --repo ${repoUrl}`.env(env).nothrow();
|
||||
|
||||
const stdout = res.text();
|
||||
const stderr = res.stderr.toString();
|
||||
|
||||
if (res.exitCode !== 0) {
|
||||
logger.error(`Restic repair index failed: ${stderr}`);
|
||||
throw new Error(`Restic repair index failed: ${stderr}`);
|
||||
}
|
||||
|
||||
logger.info(`Restic repair index completed for repository: ${repoUrl}`);
|
||||
return {
|
||||
success: true,
|
||||
output: stdout,
|
||||
message: "Index repaired successfully",
|
||||
};
|
||||
};
|
||||
|
||||
export const restic = {
|
||||
ensurePassfile,
|
||||
init,
|
||||
@@ -432,5 +505,8 @@ export const restic = {
|
||||
restore,
|
||||
snapshots,
|
||||
forget,
|
||||
unlock,
|
||||
ls,
|
||||
check,
|
||||
repairIndex,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user