Private
Public Access
1
0

M5: Patch Deployment & Job Management

Backend:
- migrations/003_jobs_scheduling.sql: retry_next_at/last_error columns,
  pg_notify trigger for immediate job dispatch, retry index
- pm-agent-client: ApplyPatchesRequest/Response, AgentJobStatus,
  RollbackResponse types; apply_patches/job_status/rollback_job
  client methods + generic POST helper
- pm-core/models: JobStatus, JobKind, PatchJob, PatchJobHost,
  CreateJobRequest, PatchJobSummary
- pm-web/routes/jobs.rs: POST/GET /api/v1/jobs, GET /jobs/:id,
  POST /jobs/:id/cancel, POST /jobs/:id/rollback
- pm-worker/job_executor.rs: NOTIFY listener, periodic scanner,
  execute_host_job, poll_running_jobs, handle_host_failure (3-retry
  exponential backoff 1m/5m/30m), sync_job_status, retry_pending_jobs
- pm-worker/main.rs: spawn job_executor

Frontend:
- types/index.ts: PatchInfo, PatchJobHost, PatchJob, PatchJobSummary,
  CreateJobRequest interfaces
- api/client.ts: jobsApi (list/get/create/cancel/rollback),
  patchesApi (getHostPatches)
- pages/PatchDeploymentPage.tsx: 3-step MUI Stepper
  (host select → configure → result)
- pages/JobsPage.tsx: job list table, expandable per-host detail,
  cancel/rollback actions with confirm dialog, load-more pagination
- App.tsx: /jobs and /deployment routes wired to real pages

cargo check: 0 errors | vite build: 0 errors
This commit is contained in:
2026-04-23 17:08:43 +00:00
parent a6eb762962
commit 6f9c6dc881
30 changed files with 8465 additions and 44 deletions

View File

@ -0,0 +1,151 @@
//! Fleet status routes.
//!
//! GET /api/v1/status/fleet — aggregate health and patch summary across all hosts.
use axum::{
extract::State,
http::StatusCode,
response::Json,
routing::get,
Router,
};
use serde::Serialize;
use serde_json::{json, Value};
use crate::AppState;
pub fn router() -> Router<AppState> {
Router::new().route("/fleet", get(fleet_status))
}
// ── Response type ─────────────────────────────────────────────────────────────
#[derive(Debug, Serialize)]
pub struct FleetStatus {
pub total_hosts: i64,
pub healthy: i64,
pub degraded: i64,
pub unreachable: i64,
pub pending: i64,
pub total_pending_patches: i64,
pub hosts_requiring_reboot: i64,
pub compliance_pct: f64,
}
// ── GET /api/v1/status/fleet ──────────────────────────────────────────────────
pub async fn fleet_status(
State(state): State<AppState>,
) -> Result<Json<FleetStatus>, (StatusCode, Json<Value>)> {
// ── 1. Host health aggregates ─────────────────────────────────────────
let health_row: (i64, i64, i64, i64, i64) = sqlx::query_as(
r#"
SELECT
COUNT(*) AS total_hosts,
COUNT(*) FILTER (WHERE health_status = 'healthy') AS healthy,
COUNT(*) FILTER (WHERE health_status = 'degraded') AS degraded,
COUNT(*) FILTER (WHERE health_status = 'unreachable') AS unreachable,
COUNT(*) FILTER (WHERE health_status = 'pending') AS pending
FROM hosts
"#,
)
.fetch_one(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, "fleet_status: failed to query host health aggregates");
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({ "error": { "code": "internal_error", "message": "Database error" } })),
)
})?;
let (total_hosts, healthy, degraded, unreachable, pending) = health_row;
// ── 2. Total pending patches across fleet (latest row per host) ───────
let total_pending_patches: i64 = sqlx::query_scalar(
r#"
SELECT COALESCE(SUM(patch_count), 0)
FROM (
SELECT DISTINCT ON (host_id) patch_count
FROM host_patch_data
ORDER BY host_id, polled_at DESC
) latest
"#,
)
.fetch_one(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, "fleet_status: failed to query total pending patches");
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({ "error": { "code": "internal_error", "message": "Database error" } })),
)
})?;
// ── 3. Hosts requiring a reboot (latest patch row per host) ───────────
let hosts_requiring_reboot: i64 = sqlx::query_scalar(
r#"
SELECT COUNT(*)
FROM (
SELECT DISTINCT ON (host_id) available_patches
FROM host_patch_data
ORDER BY host_id, polled_at DESC
) latest
WHERE available_patches @> '[{"requires_reboot": true}]'
"#,
)
.fetch_one(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, "fleet_status: failed to query reboot-required hosts");
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({ "error": { "code": "internal_error", "message": "Database error" } })),
)
})?;
// ── 4. Compliance: hosts with zero pending patches / total hosts ───────
// Hosts that have been polled and have patch_count == 0 are considered
// compliant. Hosts with no patch data at all are excluded from the
// compliance calculation.
let compliant_hosts: i64 = sqlx::query_scalar(
r#"
SELECT COUNT(*)
FROM (
SELECT DISTINCT ON (host_id) patch_count
FROM host_patch_data
ORDER BY host_id, polled_at DESC
) latest
WHERE patch_count = 0
"#,
)
.fetch_one(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, "fleet_status: failed to query compliant hosts");
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({ "error": { "code": "internal_error", "message": "Database error" } })),
)
})?;
let compliance_pct = if total_hosts == 0 {
100.0_f64
} else {
(compliant_hosts as f64 / total_hosts as f64) * 100.0
};
// Round to one decimal place.
let compliance_pct = (compliance_pct * 10.0).round() / 10.0;
Ok(Json(FleetStatus {
total_hosts,
healthy,
degraded,
unreachable,
pending,
total_pending_patches,
hosts_requiring_reboot,
compliance_pct,
}))
}