Private
Public Access
1
0

feat(M11+M12): Email notifications, audit hardening, deployment packaging, backup/DR, integration testing

M11 - Email Notifications + Audit Logging Hardening:
- Email notifier (lettre crate) with templates for patch failure, job completion, maintenance reminders
- Audit log hash chaining (prev_hash + row_hash) for tamper-evident logging
- Periodic + on-demand audit integrity verification
- Audit logging for all config changes and certificate operations
- Frontend: email settings integration, audit integrity verification action

M12 - Deployment Packaging, Backup/DR, Integration Testing:
- scripts/backup.sh: Nightly pg_dump, CA backup (GPG), config backup (secrets excluded unless encrypted)
- scripts/setup.sh: Enhanced with backup dir, seed migration, backup cron, systemd target install
- systemd units: Restart=always, WatchdogSec, ReadWritePaths, security hardening
- systemd/patch-manager.target: Service target for coordinated lifecycle
- docs/runbooks/restore.md: Full DR runbook with RPO 24h / RTO 4h targets
- scripts/integration-test.sh: 9 test suites covering full API lifecycle
- scripts/performance-test.sh: NFR validation (dashboard <5s, CIDR /22 <10s, API <2s)
- docs/security-review.md: Comprehensive security control verification
- docs/compliance-mapping.md: HIPAA (6 sections) + PCI-DSS v4.0 (9 requirements) mapped
This commit is contained in:
2026-04-24 00:45:51 +00:00
parent 84ab92f4f0
commit 297bf1bd83
26 changed files with 2651 additions and 65 deletions

View File

@ -0,0 +1,86 @@
//! Periodic audit log integrity verification.
//!
//! Runs every 24 hours, walks the audit_log rows ordered by id,
//! verifies each row_hash matches the recomputed hash, and logs the
//! result as an `AuditIntegrityVerified` event. If tampering is
//! detected, logs an error and creates an alert.
use std::sync::Arc;
use std::time::Duration;
use sqlx::PgPool;
use pm_core::audit::{log_event, verify_integrity, AuditAction};
use pm_core::config::AppConfig;
/// Run the audit integrity verifier every 24 hours.
pub async fn run_audit_verifier(pool: PgPool, _config: Arc<AppConfig>) {
tracing::info!("Audit integrity verifier started");
// Run immediately on startup
verify_once(&pool).await;
let mut interval = tokio::time::interval(Duration::from_secs(24 * 60 * 60));
loop {
interval.tick().await;
tracing::info!("Running scheduled audit integrity verification");
verify_once(&pool).await;
}
}
/// Run a single integrity verification pass.
async fn verify_once(pool: &PgPool) {
let result = verify_integrity(pool).await;
if result.intact {
tracing::info!(
rows_checked = result.rows_checked,
"Audit integrity verification passed"
);
} else {
tracing::error!(
rows_checked = result.rows_checked,
error_count = result.errors.len(),
"Audit integrity verification FAILED — tampering detected!"
);
for err in &result.errors {
tracing::error!(
row_id = err.row_id,
expected_hash = %err.expected_hash,
actual_hash = %err.actual_hash,
"Audit chain integrity error"
);
}
}
// Log the verification event
log_event(
pool,
AuditAction::AuditIntegrityVerified,
None,
None,
Some("audit_log"),
None,
serde_json::json!({
"intact": result.intact,
"rows_checked": result.rows_checked,
"error_count": result.errors.len(),
"errors": result.errors.iter().take(10).map(|e| serde_json::json!({
"row_id": e.row_id,
"expected_hash": e.expected_hash,
"actual_hash": e.actual_hash,
})).collect::<Vec<_>>(),
}),
None,
None,
)
.await;
// Update last verified timestamp
let _ = sqlx::query(
"UPDATE system_config SET value = NOW()::text, updated_at = NOW() WHERE key = 'audit_integrity_last_verified'",
)
.execute(pool)
.await;
}

View File

@ -0,0 +1,332 @@
//! Email notification module.
//!
//! Loads SMTP configuration from `system_config` and sends notification emails
//! for patch job events (completion, failure) and maintenance window reminders.
//! All emails are optional and disabled by default via `notification_email_enabled`.
use lettre::{
message::{header::ContentType, Mailbox},
transport::smtp::authentication::Credentials,
AsyncSmtpTransport, AsyncTransport, Message, Tokio1Executor,
};
use serde_json;
use sqlx::PgPool;
use pm_core::audit::{log_event, AuditAction};
/// SMTP configuration loaded from `system_config`.
struct SmtpSettings {
enabled: bool,
host: String,
port: u16,
username: String,
password: String,
from: String,
tls_mode: String,
}
/// Notification preferences loaded from `system_config`.
struct NotificationSettings {
email_enabled: bool,
email_from: String,
recipients: Vec<String>,
}
/// Load SMTP settings from the `system_config` table.
async fn load_smtp_settings(pool: &PgPool) -> SmtpSettings {
let rows: Vec<(String, String)> = sqlx::query_as(
"SELECT key, value FROM system_config WHERE key IN (
'smtp_enabled', 'smtp_host', 'smtp_port', 'smtp_username',
'smtp_password', 'smtp_from', 'smtp_tls_mode'
)",
)
.fetch_all(pool)
.await
.unwrap_or_default();
let get = |key: &str| -> String {
rows.iter()
.find(|(k, _)| k == key)
.map(|(_, v)| v.clone())
.unwrap_or_default()
};
SmtpSettings {
enabled: get("smtp_enabled") == "true",
host: get("smtp_host"),
port: get("smtp_port").parse().unwrap_or(587),
username: get("smtp_username"),
password: get("smtp_password"),
from: get("smtp_from"),
tls_mode: get("smtp_tls_mode"),
}
}
/// Load notification preferences from `system_config`.
async fn load_notification_settings(pool: &PgPool) -> NotificationSettings {
let rows: Vec<(String, String)> = sqlx::query_as(
"SELECT key, value FROM system_config WHERE key IN (
'notification_email_enabled', 'notification_email_from', 'notification_email_recipients'
)",
)
.fetch_all(pool)
.await
.unwrap_or_default();
let get = |key: &str| -> String {
rows.iter()
.find(|(k, _)| k == key)
.map(|(_, v)| v.clone())
.unwrap_or_default()
};
let recipients: Vec<String> = serde_json::from_str(&get("notification_email_recipients")).unwrap_or_default();
NotificationSettings {
email_enabled: get("notification_email_enabled") == "true",
email_from: get("notification_email_from"),
recipients,
}
}
/// Build an async SMTP transport from settings.
fn build_transport(
settings: &SmtpSettings,
) -> Result<AsyncSmtpTransport<Tokio1Executor>, String> {
match settings.tls_mode.as_str() {
"tls" => {
let mut builder = AsyncSmtpTransport::<Tokio1Executor>::relay(&settings.host)
.map_err(|e| format!("TLS relay error: {}", e))?;
builder = builder.port(settings.port);
if !settings.username.is_empty() {
builder = builder.credentials(Credentials::new(
settings.username.clone(),
settings.password.clone(),
));
}
Ok(builder.build())
}
"starttls" => {
let mut builder = AsyncSmtpTransport::<Tokio1Executor>::starttls_relay(&settings.host)
.map_err(|e| format!("STARTTLS relay error: {}", e))?;
builder = builder.port(settings.port);
if !settings.username.is_empty() {
builder = builder.credentials(Credentials::new(
settings.username.clone(),
settings.password.clone(),
));
}
Ok(builder.build())
}
_ => {
// "none" — plaintext / no TLS
let mut builder = AsyncSmtpTransport::<Tokio1Executor>::builder_dangerous(&settings.host)
.port(settings.port);
if !settings.username.is_empty() {
builder = builder.credentials(Credentials::new(
settings.username.clone(),
settings.password.clone(),
));
}
Ok(builder.build())
}
}
}
/// Send an email notification. Returns true if the email was sent successfully.
async fn send_email(
pool: &PgPool,
subject: &str,
body: &str,
) -> bool {
let smtp = match load_smtp_settings(pool).await {
s if !s.enabled => {
tracing::debug!("SMTP not enabled, skipping email notification");
return false;
}
s => s,
};
let notif = load_notification_settings(pool).await;
if !notif.email_enabled {
tracing::debug!("Email notifications disabled, skipping");
return false;
}
if notif.recipients.is_empty() {
tracing::debug!("No email recipients configured, skipping notification");
return false;
}
let from_addr = if notif.email_from.is_empty() {
smtp.from.clone()
} else {
notif.email_from
};
let from_mailbox: Mailbox = match from_addr.parse() {
Ok(m) => m,
Err(e) => {
tracing::error!(error = %e, "Invalid from address for email notification");
return false;
}
};
let mut builder = Message::builder()
.from(from_mailbox.clone())
.subject(subject)
.header(ContentType::TEXT_PLAIN);
// Add all recipients
for recipient in &notif.recipients {
let mailbox: Mailbox = match recipient.parse() {
Ok(m) => m,
Err(e) => {
tracing::error!(error = %e, recipient = %recipient, "Invalid recipient address");
continue;
}
};
builder = builder.to(mailbox);
}
let email = match builder.body(body.to_string()) {
Ok(e) => e,
Err(e) => {
tracing::error!(error = %e, "Failed to build email message");
return false;
}
};
let transport = match build_transport(&smtp) {
Ok(t) => t,
Err(e) => {
tracing::error!(error = %e, "Failed to build SMTP transport");
return false;
}
};
match transport.send(email).await {
Ok(_) => {
tracing::info!(subject, "Email notification sent successfully");
true
}
Err(e) => {
tracing::error!(error = %e, subject, "Failed to send email notification");
false
}
}
}
/// Send a patch failure notification email for a specific host.
pub async fn send_patch_failure_email(
pool: &PgPool,
host_fqdn: &str,
job_id: &str,
error_message: &str,
) {
let subject = format!("[Patch Manager] Patch Failed on {}", host_fqdn);
let body = format!(
"Patch operation failed on host: {host_fqdn}\n\
Job ID: {job_id}\n\
Error: {error_message}\n\
\n\
Please review the job details in the Patch Manager dashboard."
);
let sent = send_email(pool, &subject, &body).await;
log_event(
pool,
AuditAction::EmailNotificationSent,
None,
None,
Some("patch_job"),
Some(job_id),
serde_json::json!({
"type": "patch_failure",
"host_fqdn": host_fqdn,
"sent": sent,
}),
None,
None,
)
.await;
}
/// Send a job completion notification email.
pub async fn send_job_completion_email(
pool: &PgPool,
job_id: &str,
host_count: i64,
succeeded_count: i64,
failed_count: i64,
) {
let subject = format!("[Patch Manager] Job {} Completed", job_id);
let body = format!(
"Patch job completed: {job_id}\n\
Total hosts: {host_count}\n\
Succeeded: {succeeded_count}\n\
Failed: {failed_count}\n\
\n\
Please review the job details in the Patch Manager dashboard."
);
let sent = send_email(pool, &subject, &body).await;
log_event(
pool,
AuditAction::EmailNotificationSent,
None,
None,
Some("patch_job"),
Some(job_id),
serde_json::json!({
"type": "job_completion",
"host_count": host_count,
"succeeded_count": succeeded_count,
"failed_count": failed_count,
"sent": sent,
}),
None,
None,
)
.await;
}
/// Send a maintenance window reminder email.
pub async fn send_maintenance_window_reminder_email(
pool: &PgPool,
host_fqdn: &str,
window_label: &str,
start_at: &str,
) {
let subject = format!("[Patch Manager] Upcoming Maintenance Window: {}", window_label);
let body = format!(
"Maintenance window reminder:\n\
Host: {host_fqdn}\n\
Window: {window_label}\n\
Starts at: {start_at}\n\
\n\
Patch operations will begin at the scheduled time."
);
let sent = send_email(pool, &subject, &body).await;
log_event(
pool,
AuditAction::MaintenanceWindowReminder,
None,
None,
Some("maintenance_window"),
None,
serde_json::json!({
"type": "maintenance_reminder",
"host_fqdn": host_fqdn,
"window_label": window_label,
"sent": sent,
}),
None,
None,
)
.await;
}

View File

@ -22,6 +22,7 @@ use tokio::{sync::Semaphore, time};
use uuid::Uuid;
use crate::agent_loader::load_agent_certs;
use crate::email;
// ─────────────────────────────────────────────────────────────────────────────
// Internal DB row types
@ -710,6 +711,8 @@ async fn handle_host_failure(pool: PgPool, pjh_id: Uuid, error_msg: String) {
/// 2. All hosts `succeeded` → parent `succeeded`.
/// 3. All hosts `cancelled` → parent `cancelled`.
/// 4. Any `failed` with none still active → parent `failed` (includes partial).
///
/// After rolling up, sends email notifications for completed/failed jobs.
async fn sync_job_status(pool: &PgPool, job_id: Uuid) {
let counts: StatusCounts = match sqlx::query_as(
r#"
@ -798,6 +801,57 @@ async fn sync_job_status(pool: &PgPool, job_id: Uuid) {
if let Err(e) = result {
tracing::error!(%job_id, error = %e, "sync_job_status: failed to update parent job");
}
// Send email notifications for completed/failed jobs
if set_completed {
// Spawn email notification in background — non-blocking
let pool_clone = pool.clone();
let job_id_str = job_id.to_string();
let total = counts.total_count;
let succeeded = counts.succeeded_count;
let failed = counts.failed_count;
tokio::spawn(async move {
email::send_job_completion_email(
&pool_clone,
&job_id_str,
total,
succeeded,
failed,
).await;
// If there are failures, also send failure emails per host
if failed > 0 {
let failed_hosts: Vec<(String, String)> = match sqlx::query_as(
r#"
SELECT h.fqdn, COALESCE(pjh.error_message, 'Unknown error')
FROM patch_job_hosts pjh
JOIN hosts h ON h.id = pjh.host_id
WHERE pjh.job_id = $1 AND pjh.status = 'failed'
"#,
)
.bind(job_id)
.fetch_all(&pool_clone)
.await
{
Ok(rows) => rows,
Err(e) => {
tracing::error!(%job_id, error = %e, "sync_job_status: failed to fetch failed hosts for email");
Vec::new()
}
};
for (fqdn, error_msg) in failed_hosts {
email::send_patch_failure_email(
&pool_clone,
&fqdn,
&job_id_str,
&error_msg,
).await;
}
}
});
}
}
// ─────────────────────────────────────────────────────────────────────────────

View File

@ -1,9 +1,11 @@
//! pm-worker — Linux Patch Manager background worker.
//!
//! Handles scheduled polling, job execution, maintenance window scheduling,
//! retry logic, email notifications, and data pruning.
//! retry logic, email notifications, audit integrity verification, and data pruning.
mod agent_loader;
mod audit_verifier;
mod email;
mod health_poller;
mod maintenance_scheduler;
mod patch_poller;
@ -20,6 +22,7 @@ use sqlx::PgPool;
use std::{sync::Arc, time::Duration};
use tokio::time;
use audit_verifier::run_audit_verifier;
use health_poller::run_health_poller;
use maintenance_scheduler::run_maintenance_scheduler;
use patch_poller::run_patch_poller;
@ -30,7 +33,7 @@ use ws_relay::run_ws_relay;
/// Minimum number of applied migrations the worker requires before
/// accepting work. Prevents the worker from running against a schema
/// that hasn't been migrated yet.
const REQUIRED_MIGRATION_COUNT: i64 = 1;
const REQUIRED_MIGRATION_COUNT: i64 = 5;
/// How long to wait between schema-version checks before giving up.
const SCHEMA_CHECK_TIMEOUT: Duration = Duration::from_secs(120);
@ -80,6 +83,9 @@ async fn main() -> anyhow::Result<()> {
// M7: WS relay — streams agent job events → DB → pg_notify → browser WS
let ws_relay_handle = tokio::spawn(run_ws_relay(pool.clone(), config.clone()));
// M11: audit integrity verification (runs every 24 hours)
let audit_verifier_handle = tokio::spawn(run_audit_verifier(pool.clone(), config.clone()));
tracing::info!("Worker tasks started");
// Wait for all tasks (they run indefinitely)
@ -91,6 +97,7 @@ async fn main() -> anyhow::Result<()> {
job_exec_handle,
maint_sched_handle,
ws_relay_handle,
audit_verifier_handle,
);
Ok(())