feat: M6 maintenance windows + M7 WebSocket relay (real-time job status)
M6 - Maintenance Windows: - routes/maintenance_windows.rs: full CRUD API - migrations/004_maintenance_windows.sql - frontend/MaintenanceWindowsPage.tsx - HostDetailPage.tsx: maintenance window config panel M7 - WebSocket Relay: - pm-web: POST /api/v1/ws/ticket (JWT-auth, single-use, 60s TTL) - pm-web: WS /api/v1/ws/jobs?ticket=... (PgListener -> browser push) - pm-web: DashMap<String,WsTicket> in AppState, 30s cleanup task - pm-worker: ws_relay.rs subscribes to agent WS, updates patch_job_hosts, fires pg_notify(job_update) for real-time fan-out - frontend: useJobWebSocket hook with auto-reconnect + exponential backoff - frontend: JobsPage live updates with WS status indicator - types: JobWsEvent interface - api/client: wsApi.createTicket() All tasks marked complete in tasks/todo.md cargo build: zero errors, zero warnings
This commit is contained in:
164
crates/pm-worker/src/maintenance_scheduler.rs
Normal file
164
crates/pm-worker/src/maintenance_scheduler.rs
Normal file
@ -0,0 +1,164 @@
|
||||
//! Maintenance window scheduler.
|
||||
//!
|
||||
//! Polls every 60 seconds and, for each enabled maintenance window that is
|
||||
//! currently open, dispatches any queued non-immediate patch jobs associated
|
||||
//! with the window's host.
|
||||
//!
|
||||
//! A window is considered "open" when:
|
||||
//! - `once` — `start_at <= NOW() < start_at + duration_minutes * '1 minute'`
|
||||
//! - `daily` — current UTC time-of-day is within the window's daily slot
|
||||
//! - `weekly` — same as daily, but only on the matching `recurrence_day` (0=Sun)
|
||||
//! - `monthly` — same as daily, but only on the matching `recurrence_day` (1-31)
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use pm_core::config::AppConfig;
|
||||
use sqlx::{FromRow, PgPool};
|
||||
use tokio::time;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::job_executor::process_job;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Internal types
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, FromRow)]
|
||||
struct OpenWindowHost {
|
||||
host_id: Uuid,
|
||||
}
|
||||
|
||||
#[derive(Debug, FromRow)]
|
||||
struct QueuedJobId {
|
||||
job_id: Uuid,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Public entry point
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Run the maintenance scheduler indefinitely.
|
||||
/// Spawned by `pm-worker/src/main.rs` alongside the job executor.
|
||||
pub async fn run_maintenance_scheduler(pool: PgPool, config: Arc<AppConfig>) {
|
||||
tracing::info!("Maintenance scheduler started");
|
||||
|
||||
// First tick fires immediately; consume it to align with job_executor.
|
||||
let mut ticker = time::interval(std::time::Duration::from_secs(60));
|
||||
ticker.tick().await;
|
||||
|
||||
loop {
|
||||
ticker.tick().await;
|
||||
tracing::debug!("Maintenance scheduler: checking open windows");
|
||||
dispatch_open_window_jobs(pool.clone(), config.clone()).await;
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Core dispatch logic
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Find all hosts with a currently-open maintenance window, then for each,
|
||||
/// find their queued non-immediate job entries and dispatch them.
|
||||
async fn dispatch_open_window_jobs(pool: PgPool, config: Arc<AppConfig>) {
|
||||
// ── 1. Find all host_ids with an open window right now ─────────────────
|
||||
let open_hosts: Vec<OpenWindowHost> = match sqlx::query_as(
|
||||
r#"
|
||||
SELECT DISTINCT mw.host_id
|
||||
FROM maintenance_windows mw
|
||||
WHERE mw.enabled = TRUE
|
||||
AND (
|
||||
-- One-time: absolute window
|
||||
( mw.recurrence = 'once'
|
||||
AND mw.start_at <= NOW()
|
||||
AND NOW() < mw.start_at + (mw.duration_minutes * INTERVAL '1 minute')
|
||||
)
|
||||
OR
|
||||
-- Daily: time-of-day slot, any day
|
||||
( mw.recurrence = 'daily'
|
||||
AND (NOW() AT TIME ZONE 'UTC')::time >= (mw.start_at AT TIME ZONE 'UTC')::time
|
||||
AND (NOW() AT TIME ZONE 'UTC')::time < ((mw.start_at AT TIME ZONE 'UTC')::time
|
||||
+ (mw.duration_minutes * INTERVAL '1 minute'))
|
||||
)
|
||||
OR
|
||||
-- Weekly: matching day-of-week + time-of-day slot
|
||||
( mw.recurrence = 'weekly'
|
||||
AND EXTRACT(DOW FROM NOW() AT TIME ZONE 'UTC') = mw.recurrence_day
|
||||
AND (NOW() AT TIME ZONE 'UTC')::time >= (mw.start_at AT TIME ZONE 'UTC')::time
|
||||
AND (NOW() AT TIME ZONE 'UTC')::time < ((mw.start_at AT TIME ZONE 'UTC')::time
|
||||
+ (mw.duration_minutes * INTERVAL '1 minute'))
|
||||
)
|
||||
OR
|
||||
-- Monthly: matching day-of-month + time-of-day slot
|
||||
( mw.recurrence = 'monthly'
|
||||
AND EXTRACT(DAY FROM NOW() AT TIME ZONE 'UTC') = mw.recurrence_day
|
||||
AND (NOW() AT TIME ZONE 'UTC')::time >= (mw.start_at AT TIME ZONE 'UTC')::time
|
||||
AND (NOW() AT TIME ZONE 'UTC')::time < ((mw.start_at AT TIME ZONE 'UTC')::time
|
||||
+ (mw.duration_minutes * INTERVAL '1 minute'))
|
||||
)
|
||||
)
|
||||
"#,
|
||||
)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
{
|
||||
Ok(hosts) => hosts,
|
||||
Err(e) => {
|
||||
tracing::error!(error = %e, "dispatch_open_window_jobs: open-hosts query failed");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if open_hosts.is_empty() {
|
||||
tracing::debug!("Maintenance scheduler: no open windows this cycle");
|
||||
return;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
open_host_count = open_hosts.len(),
|
||||
"Maintenance scheduler: found hosts with open windows"
|
||||
);
|
||||
|
||||
// ── 2. For each open host, find distinct queued non-immediate job IDs ──
|
||||
for host in open_hosts {
|
||||
let job_ids: Vec<QueuedJobId> = match sqlx::query_as(
|
||||
r#"
|
||||
SELECT DISTINCT pjh.job_id
|
||||
FROM patch_job_hosts pjh
|
||||
JOIN patch_jobs j ON j.id = pjh.job_id
|
||||
WHERE pjh.host_id = $1
|
||||
AND pjh.status = 'queued'
|
||||
AND j.immediate = FALSE
|
||||
AND j.status != 'cancelled'
|
||||
AND (pjh.retry_next_at IS NULL OR pjh.retry_next_at <= NOW())
|
||||
"#,
|
||||
)
|
||||
.bind(host.host_id)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
{
|
||||
Ok(ids) => ids,
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
error = %e,
|
||||
host_id = %host.host_id,
|
||||
"dispatch_open_window_jobs: queued jobs query failed"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
for job in job_ids {
|
||||
tracing::info!(
|
||||
job_id = %job.job_id,
|
||||
host_id = %host.host_id,
|
||||
"Maintenance scheduler: dispatching non-immediate job (window open)"
|
||||
);
|
||||
|
||||
let (p, c) = (pool.clone(), config.clone());
|
||||
let job_id = job.job_id;
|
||||
tokio::spawn(async move {
|
||||
process_job(p, c, job_id).await;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user