feat: add CRL health status schema and UI (PR 3 of 6)
All checks were successful
CI Pipeline / Rust Format Check (push) Successful in 5s
CI Pipeline / Clippy Lints (push) Successful in 52s
CI Pipeline / Rust Unit Tests (push) Successful in 1m8s
CI Pipeline / Security Audit (push) Successful in 5s
CI Pipeline / Frontend Lint & Type Check (push) Successful in 15s
CI Pipeline / Build .deb & Release (push) Has been skipped
All checks were successful
CI Pipeline / Rust Format Check (push) Successful in 5s
CI Pipeline / Clippy Lints (push) Successful in 52s
CI Pipeline / Rust Unit Tests (push) Successful in 1m8s
CI Pipeline / Security Audit (push) Successful in 5s
CI Pipeline / Frontend Lint & Type Check (push) Successful in 15s
CI Pipeline / Build .deb & Release (push) Has been skipped
* feat: add CRL health status schema and UI (PR 3 of 6) * fix(lint): strict equality for crl_age_seconds --------- Co-authored-by: Draco Lunaris <331325+Draco-Lunaris@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
5aec9e629c
commit
ea8337b944
10
crates/pm-agent-client/src/types.rs
Executable file → Normal file
10
crates/pm-agent-client/src/types.rs
Executable file → Normal file
@ -57,6 +57,16 @@ pub struct HealthData {
|
||||
pub uptime_seconds: u64,
|
||||
/// Agent software version string.
|
||||
pub version: String,
|
||||
/// CRL status reported by the agent: `"valid"`, `"expired"`, `"missing"`, `"invalid"`.
|
||||
/// Absent for older agents that do not report CRL status.
|
||||
#[serde(default)]
|
||||
pub crl_status: Option<String>,
|
||||
/// Seconds since the agent's CRL was last refreshed.
|
||||
#[serde(default)]
|
||||
pub crl_age_seconds: Option<i64>,
|
||||
/// When the agent's CRL expires / next update is due (ISO-8601).
|
||||
#[serde(default)]
|
||||
pub crl_next_update: Option<String>,
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
|
||||
@ -94,6 +94,15 @@ pub struct Host {
|
||||
pub notes: String,
|
||||
pub registered_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
/// CRL status reported by the agent: valid, expired, missing, invalid, or NULL for older agents.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub crl_status: Option<String>,
|
||||
/// Seconds since the agent's CRL was last refreshed.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub crl_age_seconds: Option<i64>,
|
||||
/// When the agent's CRL expires / next update is due.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub crl_next_update: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
/// Payload for registering a new host.
|
||||
@ -129,6 +138,9 @@ pub struct HostSummary {
|
||||
pub patches_missing: i32,
|
||||
pub health_check_status: Option<String>,
|
||||
pub registered_at: DateTime<Utc>,
|
||||
/// CRL status reported by the agent: valid, expired, missing, invalid, or NULL for older agents.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub crl_status: Option<String>,
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
|
||||
5
crates/pm-web/src/routes/hosts.rs
Executable file → Normal file
5
crates/pm-web/src/routes/hosts.rs
Executable file → Normal file
@ -133,6 +133,7 @@ async fn list_hosts(
|
||||
ELSE 'all_healthy'
|
||||
END AS health_check_status,
|
||||
h.registered_at
|
||||
h.crl_status
|
||||
FROM hosts h
|
||||
LEFT JOIN host_patch_data hpd ON hpd.host_id = h.id
|
||||
ORDER BY h.fqdn
|
||||
@ -166,6 +167,7 @@ async fn list_hosts(
|
||||
ELSE 'all_healthy'
|
||||
END AS health_check_status,
|
||||
h.registered_at
|
||||
h.crl_status
|
||||
FROM hosts h
|
||||
LEFT JOIN host_patch_data hpd ON hpd.host_id = h.id
|
||||
WHERE
|
||||
@ -319,7 +321,8 @@ async fn get_host(
|
||||
SELECT id, fqdn, host(ip_address)::text AS ip_address, display_name,
|
||||
os_family, os_name, arch, agent_version, health_status,
|
||||
last_health_at, last_patch_at, agent_port, notes,
|
||||
registered_at, updated_at
|
||||
registered_at, updated_at,
|
||||
crl_status, crl_age_seconds, crl_next_update
|
||||
FROM hosts WHERE id = $1
|
||||
) h
|
||||
"#,
|
||||
|
||||
43
crates/pm-web/src/routes/status.rs
Executable file → Normal file
43
crates/pm-web/src/routes/status.rs
Executable file → Normal file
@ -24,6 +24,16 @@ pub struct FleetStatus {
|
||||
pub total_pending_patches: i64,
|
||||
pub hosts_requiring_reboot: i64,
|
||||
pub compliance_pct: f64,
|
||||
/// Hosts with CRL status 'valid'.
|
||||
pub crl_valid: i64,
|
||||
/// Hosts with CRL status 'expired'.
|
||||
pub crl_expired: i64,
|
||||
/// Hosts with CRL status 'missing' (agent reports missing CRL).
|
||||
pub crl_missing: i64,
|
||||
/// Hosts with CRL status 'invalid' (security event — needs immediate attention).
|
||||
pub crl_invalid: i64,
|
||||
/// Hosts not reporting CRL status (older agents or no data yet).
|
||||
pub crl_not_reporting: i64,
|
||||
}
|
||||
|
||||
// ── GET /api/v1/status/fleet ──────────────────────────────────────────────────
|
||||
@ -132,6 +142,34 @@ pub async fn fleet_status(
|
||||
// Round to one decimal place.
|
||||
let compliance_pct = (compliance_pct * 10.0).round() / 10.0;
|
||||
|
||||
// ── 5. CRL status counts ────────────────────────────────────────────────
|
||||
let (crl_valid, crl_expired, crl_missing, crl_invalid, crl_not_reporting): (
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
) = sqlx::query_as(
|
||||
r#"
|
||||
SELECT
|
||||
COALESCE(SUM(CASE WHEN crl_status = 'valid' THEN 1 END), 0),
|
||||
COALESCE(SUM(CASE WHEN crl_status = 'expired' THEN 1 END), 0),
|
||||
COALESCE(SUM(CASE WHEN crl_status = 'missing' THEN 1 END), 0),
|
||||
COALESCE(SUM(CASE WHEN crl_status = 'invalid' THEN 1 END), 0),
|
||||
COALESCE(SUM(CASE WHEN crl_status IS NULL THEN 1 END), 0)
|
||||
FROM hosts
|
||||
"#,
|
||||
)
|
||||
.fetch_one(&state.db)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "fleet_status: failed to query CRL status counts");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({ "error": { "code": "internal_error", "message": "Database error" } })),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(Json(FleetStatus {
|
||||
total_hosts,
|
||||
healthy,
|
||||
@ -141,5 +179,10 @@ pub async fn fleet_status(
|
||||
total_pending_patches,
|
||||
hosts_requiring_reboot,
|
||||
compliance_pct,
|
||||
crl_valid,
|
||||
crl_expired,
|
||||
crl_missing,
|
||||
crl_invalid,
|
||||
crl_not_reporting,
|
||||
}))
|
||||
}
|
||||
|
||||
@ -15,13 +15,13 @@ pm-agent-client = { path = "../pm-agent-client" }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
sqlx = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
rustls = { workspace = true }
|
||||
tokio-rustls = { version = "0.26" }
|
||||
|
||||
@ -116,8 +116,9 @@ pub async fn run_health_poller(pool: PgPool, config: Arc<AppConfig>) {
|
||||
|
||||
/// Poll a single host, persist the result, and return the determined status.
|
||||
///
|
||||
/// Also updates `agent_version` from the health response and
|
||||
/// `os_family`/`os_name`/`arch` from the `/system/info` endpoint when available.
|
||||
/// Also updates `agent_version` from the health response,
|
||||
/// `os_family`/`os_name`/`arch` from the `/system/info` endpoint when available,
|
||||
/// and CRL status fields from the health response when reported by the agent.
|
||||
async fn poll_host_health(
|
||||
pool: PgPool,
|
||||
host: HostRow,
|
||||
@ -125,79 +126,107 @@ async fn poll_host_health(
|
||||
client_key: &[u8],
|
||||
ca_cert: &[u8],
|
||||
) -> HostHealthStatus {
|
||||
// Determine status, payload, agent version, and optional system info.
|
||||
let (status, payload, agent_version, sys_info) = match AgentClient::new(
|
||||
&host.ip_address,
|
||||
host.agent_port as u16,
|
||||
client_cert,
|
||||
client_key,
|
||||
ca_cert,
|
||||
) {
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
host_id = %host.id,
|
||||
error = %e,
|
||||
"Health poller: failed to build AgentClient"
|
||||
);
|
||||
(
|
||||
HostHealthStatus::Unreachable,
|
||||
serde_json::Value::Object(Default::default()),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
},
|
||||
Ok(client) => {
|
||||
let (status, payload, version) = match client.health().await {
|
||||
Ok(data) => {
|
||||
let payload = serde_json::to_value(&data).unwrap_or_default();
|
||||
(HostHealthStatus::Healthy, payload, Some(data.version))
|
||||
},
|
||||
Err(AgentClientError::Timeout) => {
|
||||
tracing::warn!(host_id = %host.id, "Health poller: agent timed out");
|
||||
(
|
||||
HostHealthStatus::Unreachable,
|
||||
serde_json::Value::Object(Default::default()),
|
||||
None,
|
||||
)
|
||||
},
|
||||
Err(AgentClientError::Connect(_)) => {
|
||||
tracing::warn!(host_id = %host.id, "Health poller: agent connection refused");
|
||||
(
|
||||
HostHealthStatus::Unreachable,
|
||||
serde_json::Value::Object(Default::default()),
|
||||
None,
|
||||
)
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!(host_id = %host.id, error = %e, "Health poller: agent error");
|
||||
(
|
||||
HostHealthStatus::Degraded,
|
||||
serde_json::Value::Object(Default::default()),
|
||||
None,
|
||||
)
|
||||
},
|
||||
};
|
||||
|
||||
// Try to fetch system info for OS/arch details (best-effort).
|
||||
let sys_info = if status != HostHealthStatus::Unreachable {
|
||||
match client.system_info().await {
|
||||
Ok(info) => Some(info),
|
||||
Err(e) => {
|
||||
tracing::debug!(
|
||||
host_id = %host.id,
|
||||
error = %e,
|
||||
"Health poller: failed to get system info (non-fatal)"
|
||||
);
|
||||
None
|
||||
// Determine status, payload, agent version, optional system info, and CRL fields.
|
||||
let (status, payload, agent_version, sys_info, crl_status, crl_age_seconds, crl_next_update) =
|
||||
match AgentClient::new(
|
||||
&host.ip_address,
|
||||
host.agent_port as u16,
|
||||
client_cert,
|
||||
client_key,
|
||||
ca_cert,
|
||||
) {
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
host_id = %host.id,
|
||||
error = %e,
|
||||
"Health poller: failed to build AgentClient"
|
||||
);
|
||||
(
|
||||
HostHealthStatus::Unreachable,
|
||||
serde_json::Value::Object(Default::default()),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
},
|
||||
Ok(client) => {
|
||||
let (status, payload, version, crl_status, crl_age, crl_next) = match client
|
||||
.health()
|
||||
.await
|
||||
{
|
||||
Ok(data) => {
|
||||
let payload = serde_json::to_value(&data).unwrap_or_default();
|
||||
let crl_status = data.crl_status.clone();
|
||||
let crl_age = data.crl_age_seconds;
|
||||
let crl_next = data.crl_next_update.clone();
|
||||
(
|
||||
HostHealthStatus::Healthy,
|
||||
payload,
|
||||
Some(data.version),
|
||||
crl_status,
|
||||
crl_age,
|
||||
crl_next,
|
||||
)
|
||||
},
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
Err(AgentClientError::Timeout) => {
|
||||
tracing::warn!(host_id = %host.id, "Health poller: agent timed out");
|
||||
(
|
||||
HostHealthStatus::Unreachable,
|
||||
serde_json::Value::Object(Default::default()),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
},
|
||||
Err(AgentClientError::Connect(_)) => {
|
||||
tracing::warn!(host_id = %host.id, "Health poller: agent connection refused");
|
||||
(
|
||||
HostHealthStatus::Unreachable,
|
||||
serde_json::Value::Object(Default::default()),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!(host_id = %host.id, error = %e, "Health poller: agent error");
|
||||
(
|
||||
HostHealthStatus::Degraded,
|
||||
serde_json::Value::Object(Default::default()),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
},
|
||||
};
|
||||
|
||||
(status, payload, version, sys_info)
|
||||
},
|
||||
};
|
||||
// Try to fetch system info for OS/arch details (best-effort).
|
||||
let sys_info = if status != HostHealthStatus::Unreachable {
|
||||
match client.system_info().await {
|
||||
Ok(info) => Some(info),
|
||||
Err(e) => {
|
||||
tracing::debug!(
|
||||
host_id = %host.id,
|
||||
error = %e,
|
||||
"Health poller: failed to get system info (non-fatal)"
|
||||
);
|
||||
None
|
||||
},
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
(
|
||||
status, payload, version, sys_info, crl_status, crl_age, crl_next,
|
||||
)
|
||||
},
|
||||
};
|
||||
|
||||
// Insert into host_health_data.
|
||||
if let Err(e) = sqlx::query(
|
||||
@ -220,7 +249,13 @@ async fn poll_host_health(
|
||||
.as_ref()
|
||||
.map(|i| format!("{} {}", i.os, i.os_version));
|
||||
|
||||
// Update hosts table with health status, agent version, and OS details.
|
||||
// Parse CRL next_update from ISO-8601 string to DateTime if present.
|
||||
let crl_next_update_dt: Option<chrono::DateTime<chrono::Utc>> = crl_next_update
|
||||
.as_ref()
|
||||
.and_then(|s| chrono::DateTime::parse_from_rfc3339(s).ok())
|
||||
.map(|dt| dt.to_utc());
|
||||
|
||||
// Update hosts table with health status, agent version, OS details, and CRL fields.
|
||||
// COALESCE preserves existing values when new data is unavailable.
|
||||
if let Err(e) = sqlx::query(
|
||||
r#"
|
||||
@ -229,7 +264,10 @@ async fn poll_host_health(
|
||||
agent_version = COALESCE($3, agent_version),
|
||||
os_family = COALESCE($4, os_family),
|
||||
os_name = COALESCE($5, os_name),
|
||||
arch = COALESCE($6, arch)
|
||||
arch = COALESCE($6, arch),
|
||||
crl_status = COALESCE($7, crl_status),
|
||||
crl_age_seconds = COALESCE($8, crl_age_seconds),
|
||||
crl_next_update = COALESCE($9, crl_next_update)
|
||||
WHERE id = $1
|
||||
"#,
|
||||
)
|
||||
@ -239,6 +277,9 @@ async fn poll_host_health(
|
||||
.bind(sys_info.as_ref().map(|i| i.os.as_str()))
|
||||
.bind(os_name_from_sysinfo)
|
||||
.bind(sys_info.as_ref().map(|i| i.architecture.as_str()))
|
||||
.bind(&crl_status)
|
||||
.bind(crl_age_seconds)
|
||||
.bind(crl_next_update_dt)
|
||||
.execute(&pool)
|
||||
.await
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user