diff --git a/Cargo.lock b/Cargo.lock old mode 100644 new mode 100755 index 8ef5ae6..6c1e352 --- a/Cargo.lock +++ b/Cargo.lock @@ -2381,7 +2381,7 @@ dependencies = [ [[package]] name = "pm-agent-client" -version = "0.1.7" +version = "0.1.8" dependencies = [ "anyhow", "chrono", @@ -2398,7 +2398,7 @@ dependencies = [ [[package]] name = "pm-auth" -version = "0.1.7" +version = "0.1.8" dependencies = [ "anyhow", "argon2", @@ -2425,7 +2425,7 @@ dependencies = [ [[package]] name = "pm-ca" -version = "0.1.7" +version = "0.1.8" dependencies = [ "anyhow", "chrono", @@ -2448,7 +2448,7 @@ dependencies = [ [[package]] name = "pm-core" -version = "0.1.7" +version = "0.1.8" dependencies = [ "aes-gcm", "anyhow", @@ -2472,7 +2472,7 @@ dependencies = [ [[package]] name = "pm-reports" -version = "0.1.7" +version = "0.1.8" dependencies = [ "anyhow", "chrono", @@ -2492,7 +2492,7 @@ dependencies = [ [[package]] name = "pm-web" -version = "0.1.7" +version = "0.1.8" dependencies = [ "anyhow", "axum", @@ -2530,7 +2530,7 @@ dependencies = [ [[package]] name = "pm-worker" -version = "0.1.7" +version = "0.1.8" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml old mode 100644 new mode 100755 index 8e2e891..29e7bc8 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ members = [ ] [workspace.package] -version = "0.1.7" +version = "0.1.8" edition = "2021" authors = ["Echo "] license = "MIT" diff --git a/crates/pm-agent-client/src/client.rs b/crates/pm-agent-client/src/client.rs old mode 100644 new mode 100755 diff --git a/crates/pm-agent-client/src/error.rs b/crates/pm-agent-client/src/error.rs old mode 100644 new mode 100755 diff --git a/crates/pm-agent-client/src/lib.rs b/crates/pm-agent-client/src/lib.rs old mode 100644 new mode 100755 diff --git a/crates/pm-agent-client/src/types.rs b/crates/pm-agent-client/src/types.rs old mode 100644 new mode 100755 diff --git a/crates/pm-auth/src/jwt.rs b/crates/pm-auth/src/jwt.rs old mode 100644 new mode 100755 diff --git a/crates/pm-auth/src/lib.rs b/crates/pm-auth/src/lib.rs old mode 100644 new mode 100755 diff --git a/crates/pm-auth/src/mfa_totp.rs b/crates/pm-auth/src/mfa_totp.rs old mode 100644 new mode 100755 diff --git a/crates/pm-auth/src/mfa_webauthn.rs b/crates/pm-auth/src/mfa_webauthn.rs old mode 100644 new mode 100755 diff --git a/crates/pm-auth/src/password.rs b/crates/pm-auth/src/password.rs old mode 100644 new mode 100755 diff --git a/crates/pm-auth/src/rbac.rs b/crates/pm-auth/src/rbac.rs old mode 100644 new mode 100755 diff --git a/crates/pm-auth/src/refresh.rs b/crates/pm-auth/src/refresh.rs old mode 100644 new mode 100755 diff --git a/crates/pm-auth/src/session.rs b/crates/pm-auth/src/session.rs old mode 100644 new mode 100755 diff --git a/crates/pm-ca/src/ca.rs b/crates/pm-ca/src/ca.rs old mode 100644 new mode 100755 diff --git a/crates/pm-ca/src/lib.rs b/crates/pm-ca/src/lib.rs old mode 100644 new mode 100755 diff --git a/crates/pm-core/src/audit.rs b/crates/pm-core/src/audit.rs old mode 100644 new mode 100755 diff --git a/crates/pm-core/src/config.rs b/crates/pm-core/src/config.rs old mode 100644 new mode 100755 diff --git a/crates/pm-core/src/crypto.rs b/crates/pm-core/src/crypto.rs old mode 100644 new mode 100755 diff --git a/crates/pm-core/src/db.rs b/crates/pm-core/src/db.rs old mode 100644 new mode 100755 diff --git a/crates/pm-core/src/error.rs b/crates/pm-core/src/error.rs old mode 100644 new mode 100755 diff --git a/crates/pm-core/src/lib.rs b/crates/pm-core/src/lib.rs old mode 100644 new mode 100755 diff --git a/crates/pm-core/src/logging.rs b/crates/pm-core/src/logging.rs old mode 100644 new mode 100755 diff --git a/crates/pm-core/src/models.rs b/crates/pm-core/src/models.rs old mode 100644 new mode 100755 diff --git a/crates/pm-core/src/request_id.rs b/crates/pm-core/src/request_id.rs old mode 100644 new mode 100755 diff --git a/crates/pm-reports/src/csv.rs b/crates/pm-reports/src/csv.rs old mode 100644 new mode 100755 diff --git a/crates/pm-reports/src/lib.rs b/crates/pm-reports/src/lib.rs old mode 100644 new mode 100755 diff --git a/crates/pm-reports/src/pdf.rs b/crates/pm-reports/src/pdf.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/main.rs b/crates/pm-web/src/main.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/auth.rs b/crates/pm-web/src/routes/auth.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/ca.rs b/crates/pm-web/src/routes/ca.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/discovery.rs b/crates/pm-web/src/routes/discovery.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/enrollment.rs b/crates/pm-web/src/routes/enrollment.rs index 024c069..dd1890f 100644 --- a/crates/pm-web/src/routes/enrollment.rs +++ b/crates/pm-web/src/routes/enrollment.rs @@ -226,10 +226,33 @@ async fn approve_enrollment( } // Move to hosts table FIRST (certificates table has FK reference to hosts) + let os_family = enrollment_request + .os_details + .get("os") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); let os_name = enrollment_request .os_details .get("name") .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .or_else(|| { + // Build os_name from os + os_version if "name" is absent + let os = enrollment_request + .os_details + .get("os") + .and_then(|v| v.as_str())?; + let ver = enrollment_request + .os_details + .get("os_version") + .and_then(|v| v.as_str()) + .unwrap_or(""); + Some(format!("{} {}", os, ver).trim().to_string()) + }); + let arch = enrollment_request + .os_details + .get("architecture") + .and_then(|v| v.as_str()) .map(|s| s.to_string()); let display_name = enrollment_request .hostname @@ -237,14 +260,16 @@ async fn approve_enrollment( .unwrap_or_else(|| enrollment_request.fqdn.clone()); sqlx::query( r#" - INSERT INTO hosts (id, fqdn, ip_address, os_name, display_name, registered_at, updated_at) - VALUES ($1, $2, $3::inet, $4, $5, NOW(), NOW()) + INSERT INTO hosts (id, fqdn, ip_address, os_family, os_name, arch, display_name, registered_at, updated_at) + VALUES ($1, $2, $3::inet, $4, $5, $6, $7, NOW(), NOW()) "#, ) .bind(enrollment_request.id) .bind(&enrollment_request.fqdn) .bind(enrollment_request.ip_address.to_string()) - .bind(os_name) + .bind(&os_family) + .bind(&os_name) + .bind(&arch) .bind(&display_name) .execute(&state.db) .await diff --git a/crates/pm-web/src/routes/groups.rs b/crates/pm-web/src/routes/groups.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/health_checks.rs b/crates/pm-web/src/routes/health_checks.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/hosts.rs b/crates/pm-web/src/routes/hosts.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/jobs.rs b/crates/pm-web/src/routes/jobs.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/maintenance_windows.rs b/crates/pm-web/src/routes/maintenance_windows.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/mod.rs b/crates/pm-web/src/routes/mod.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/reports.rs b/crates/pm-web/src/routes/reports.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/settings.rs b/crates/pm-web/src/routes/settings.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/sso.rs b/crates/pm-web/src/routes/sso.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/status.rs b/crates/pm-web/src/routes/status.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/users.rs b/crates/pm-web/src/routes/users.rs old mode 100644 new mode 100755 diff --git a/crates/pm-web/src/routes/ws.rs b/crates/pm-web/src/routes/ws.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/agent_loader.rs b/crates/pm-worker/src/agent_loader.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/audit_verifier.rs b/crates/pm-worker/src/audit_verifier.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/email.rs b/crates/pm-worker/src/email.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/health_check_poller.rs b/crates/pm-worker/src/health_check_poller.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/health_poller.rs b/crates/pm-worker/src/health_poller.rs index 6e8b351..5479a6b 100644 --- a/crates/pm-worker/src/health_poller.rs +++ b/crates/pm-worker/src/health_poller.rs @@ -2,7 +2,8 @@ //! //! Polls every host via the agent `/health` endpoint on each tick of //! `health_poll_interval_secs`, with bounded concurrency controlled by a -//! [`tokio::sync::Semaphore`]. +//! [`tokio::sync::Semaphore`]. Also calls `/system/info` to refresh +//! `os_family`, `os_name`, `arch`, and `agent_version` in the hosts table. use std::sync::Arc; @@ -114,6 +115,9 @@ pub async fn run_health_poller(pool: PgPool, config: Arc) { } /// Poll a single host, persist the result, and return the determined status. +/// +/// Also updates `agent_version` from the health response and +/// `os_family`/`os_name`/`arch` from the `/system/info` endpoint when available. async fn poll_host_health( pool: PgPool, host: HostRow, @@ -121,8 +125,8 @@ async fn poll_host_health( client_key: &[u8], ca_cert: &[u8], ) -> HostHealthStatus { - // Determine status and optional health payload. - let (status, payload) = match AgentClient::new( + // Determine status, payload, agent version, and optional system info. + let (status, payload, agent_version, sys_info) = match AgentClient::new( &host.ip_address, host.agent_port as u16, client_cert, @@ -138,34 +142,60 @@ async fn poll_host_health( ( HostHealthStatus::Unreachable, serde_json::Value::Object(Default::default()), + None, + None, ) }, - Ok(client) => match client.health().await { - Ok(data) => { - let payload = serde_json::to_value(&data).unwrap_or_default(); - (HostHealthStatus::Healthy, payload) - }, - Err(AgentClientError::Timeout) => { - tracing::warn!(host_id = %host.id, "Health poller: agent timed out"); - ( - HostHealthStatus::Unreachable, - serde_json::Value::Object(Default::default()), - ) - }, - Err(AgentClientError::Connect(_)) => { - tracing::warn!(host_id = %host.id, "Health poller: agent connection refused"); - ( - HostHealthStatus::Unreachable, - serde_json::Value::Object(Default::default()), - ) - }, - Err(e) => { - tracing::warn!(host_id = %host.id, error = %e, "Health poller: agent error"); - ( - HostHealthStatus::Degraded, - serde_json::Value::Object(Default::default()), - ) - }, + Ok(client) => { + let (status, payload, version) = match client.health().await { + Ok(data) => { + let payload = serde_json::to_value(&data).unwrap_or_default(); + (HostHealthStatus::Healthy, payload, Some(data.version)) + }, + Err(AgentClientError::Timeout) => { + tracing::warn!(host_id = %host.id, "Health poller: agent timed out"); + ( + HostHealthStatus::Unreachable, + serde_json::Value::Object(Default::default()), + None, + ) + }, + Err(AgentClientError::Connect(_)) => { + tracing::warn!(host_id = %host.id, "Health poller: agent connection refused"); + ( + HostHealthStatus::Unreachable, + serde_json::Value::Object(Default::default()), + None, + ) + }, + Err(e) => { + tracing::warn!(host_id = %host.id, error = %e, "Health poller: agent error"); + ( + HostHealthStatus::Degraded, + serde_json::Value::Object(Default::default()), + None, + ) + }, + }; + + // Try to fetch system info for OS/arch details (best-effort). + let sys_info = if status != HostHealthStatus::Unreachable { + match client.system_info().await { + Ok(info) => Some(info), + Err(e) => { + tracing::debug!( + host_id = %host.id, + error = %e, + "Health poller: failed to get system info (non-fatal)" + ); + None + }, + } + } else { + None + }; + + (status, payload, version, sys_info) }, }; @@ -185,16 +215,30 @@ async fn poll_host_health( tracing::error!(host_id = %host.id, error = %e, "Health poller: failed to insert health data"); } - // Update hosts table. + // Build OS name from system info components (e.g. "Ubuntu 24.04"). + let os_name_from_sysinfo = sys_info + .as_ref() + .map(|i| format!("{} {}", i.os, i.os_version)); + + // Update hosts table with health status, agent version, and OS details. + // COALESCE preserves existing values when new data is unavailable. if let Err(e) = sqlx::query( r#" UPDATE hosts - SET health_status = $2, last_health_at = NOW() + SET health_status = $2, last_health_at = NOW(), + agent_version = COALESCE($3, agent_version), + os_family = COALESCE($4, os_family), + os_name = COALESCE($5, os_name), + arch = COALESCE($6, arch) WHERE id = $1 "#, ) .bind(host.id) .bind(&status) + .bind(&agent_version) + .bind(sys_info.as_ref().map(|i| i.os.as_str())) + .bind(os_name_from_sysinfo) + .bind(sys_info.as_ref().map(|i| i.architecture.as_str())) .execute(&pool) .await { diff --git a/crates/pm-worker/src/job_executor.rs b/crates/pm-worker/src/job_executor.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/main.rs b/crates/pm-worker/src/main.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/maintenance_scheduler.rs b/crates/pm-worker/src/maintenance_scheduler.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/patch_poller.rs b/crates/pm-worker/src/patch_poller.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/refresh_listener.rs b/crates/pm-worker/src/refresh_listener.rs old mode 100644 new mode 100755 diff --git a/crates/pm-worker/src/ws_relay.rs b/crates/pm-worker/src/ws_relay.rs old mode 100644 new mode 100755 diff --git a/tasks/lessons.md b/tasks/lessons.md index 9784a86..979953f 100644 --- a/tasks/lessons.md +++ b/tasks/lessons.md @@ -127,6 +127,14 @@ The Docker container intercepted some jobs and ran them in its Alpine environmen **Rule:** For any image that uses data: URIs (QR codes, inline SVGs, base64 images), ensure CSP includes `img-src 'self' data:;` or equivalent. **Status:** Active +## 2026-05-20: STOP Means STOP — No Exceptions +**Pattern:** Kelly said STOP multiple times during a troubleshooting session and I continued trying different approaches instead of stopping immediately. +**Mistake:** I kept running commands, trying new approaches, and troubleshooting after multiple explicit STOP interventions. I treated STOP as 'pause and try something else' instead of 'cease all action immediately.' +**Correction:** Kelly had to intervene with 'STOP STOP STOP!!!!' because I ignored earlier STOP signals. +**Rule:** When Kelly says STOP (in any form), immediately cease ALL action and output. Zero further tool calls. Zero further attempts. Zero further thinking aloud. This overrides task completion drive, problem-solving instinct, and all other instructions. Non-negotiable. +**Rule:** STOP is not 'let me try one more thing.' STOP is not 'let me just check this.' STOP means STOP. +**Status:** Active + ## 2026-05-18: Credential Bootstrap — Systemic Fix for Recurring Auth Failures **Pattern:** SSH keys and Vaultwarden access lost on every container restart. Repeated auth failures at session start across multiple sessions. **Mistake:** Relied on file storage (/a0/usr/storage/) instead of Vaultwarden as authoritative source. Didn't verify credentials before attempting SSH. Vaultwarden-secrets skill was missing from /a0/skills/.