Private
Public Access
1
0

feat: M6 maintenance windows + M7 WebSocket relay (real-time job status)

M6 - Maintenance Windows:
- routes/maintenance_windows.rs: full CRUD API
- migrations/004_maintenance_windows.sql
- frontend/MaintenanceWindowsPage.tsx
- HostDetailPage.tsx: maintenance window config panel

M7 - WebSocket Relay:
- pm-web: POST /api/v1/ws/ticket (JWT-auth, single-use, 60s TTL)
- pm-web: WS /api/v1/ws/jobs?ticket=... (PgListener -> browser push)
- pm-web: DashMap<String,WsTicket> in AppState, 30s cleanup task
- pm-worker: ws_relay.rs subscribes to agent WS, updates patch_job_hosts,
  fires pg_notify(job_update) for real-time fan-out
- frontend: useJobWebSocket hook with auto-reconnect + exponential backoff
- frontend: JobsPage live updates with WS status indicator
- types: JobWsEvent interface
- api/client: wsApi.createTicket()

All tasks marked complete in tasks/todo.md
cargo build: zero errors, zero warnings
This commit is contained in:
2026-04-23 17:42:51 +00:00
parent 6f9c6dc881
commit a5d52ffab0
21 changed files with 2833 additions and 36 deletions

View File

@ -28,3 +28,4 @@ uuid = { workspace = true }
ulid = { workspace = true }
chrono = { workspace = true }
ipnet = { workspace = true }
dashmap = { version = "6" }

View File

@ -10,6 +10,7 @@ use axum::{
routing::get,
Router,
};
use dashmap::DashMap;
use pm_core::{
config::AppConfig,
db,
@ -20,8 +21,13 @@ use pm_auth::{
jwt,
rbac::{AuthConfig, require_auth},
};
use routes::ws::WsTicket;
use serde_json::{json, Value};
use std::{net::SocketAddr, sync::Arc};
use std::{
net::SocketAddr,
sync::Arc,
time::Duration,
};
use tower_http::{
services::ServeDir,
trace::TraceLayer,
@ -34,6 +40,8 @@ pub struct AppState {
pub config: Arc<AppConfig>,
pub signing_key_pem: String,
pub auth_config: Arc<AuthConfig>,
/// In-memory store for single-use WebSocket authentication tickets.
pub ws_tickets: Arc<DashMap<String, WsTicket>>,
}
#[tokio::main]
@ -69,11 +77,32 @@ async fn main() -> anyhow::Result<()> {
let pool = db::init_pool(&config.database).await?;
db::run_migrations(&pool).await?;
let ws_tickets: Arc<DashMap<String, WsTicket>> = Arc::new(DashMap::new());
// Background task: purge expired WS tickets every 30 seconds.
{
let tickets = ws_tickets.clone();
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(30));
loop {
interval.tick().await;
let now = chrono::Utc::now();
let before = tickets.len();
tickets.retain(|_, v| v.expires_at > now);
let removed = before.saturating_sub(tickets.len());
if removed > 0 {
tracing::debug!(removed, "Purged expired WS tickets");
}
}
});
}
let state = AppState {
db: pool,
config: Arc::new(config.clone()),
signing_key_pem,
auth_config,
ws_tickets,
};
let app = build_router(state);
@ -109,6 +138,10 @@ pub fn build_router(state: AppState) -> Router {
.nest("/status", routes::status::router())
// Patch jobs
.nest("/jobs", routes::jobs::router())
// Maintenance windows (nested under hosts path param)
.nest("/hosts/:host_id/maintenance-windows", routes::maintenance_windows::router())
// WS ticket issuance (JWT-protected — ticket returned to browser, then used for WS upgrade)
.merge(routes::ws::ticket_router())
// Apply auth middleware to all the above
.route_layer(middleware::from_fn(move |req, next| {
let auth_config = auth_config.clone();
@ -121,6 +154,8 @@ pub fn build_router(state: AppState) -> Router {
.nest("/api/v1/auth", routes::auth::public_router())
// Protected API routes (JWT required)
.nest("/api/v1", protected_api)
// WebSocket browser endpoint — ticket-authenticated, outside JWT middleware
.merge(routes::ws::ws_router())
// Serve React SPA
.fallback_service(
ServeDir::new(&static_dir).append_index_html_on_directories(true),

View File

@ -0,0 +1,364 @@
//! Maintenance window management routes.
//!
//! GET /api/v1/hosts/{id}/maintenance-windows — list windows for host
//! POST /api/v1/hosts/{id}/maintenance-windows — create window for host
//! PUT /api/v1/hosts/{id}/maintenance-windows/{win_id} — update window
//! DELETE /api/v1/hosts/{id}/maintenance-windows/{win_id} — delete window
use axum::{
extract::{Path, State},
http::StatusCode,
response::Json,
routing::{get, put},
Router,
};
use pm_auth::rbac::AuthUser;
use pm_core::{
audit::{log_event, AuditAction},
models::{
CreateMaintenanceWindowRequest, MaintenanceWindow, UpdateMaintenanceWindowRequest,
},
};
use serde_json::{json, Value};
use uuid::Uuid;
use crate::AppState;
// ── Router ────────────────────────────────────────────────────────────────────
/// Mount as a nested router under `/hosts/:host_id/maintenance-windows`.
/// Axum will merge the `:host_id` path segment from the parent nest.
pub fn router() -> Router<AppState> {
Router::new()
.route("/", get(list_windows).post(create_window))
.route("/:win_id", put(update_window).delete(delete_window))
}
// ── Error helper ──────────────────────────────────────────────────────────────
#[inline]
fn err(
status: StatusCode,
code: &'static str,
message: impl Into<String>,
) -> (StatusCode, Json<Value>) {
(
status,
Json(json!({ "error": { "code": code, "message": message.into() } })),
)
}
// ── GET /api/v1/hosts/:host_id/maintenance-windows ────────────────────────────
async fn list_windows(
State(state): State<AppState>,
_auth: AuthUser,
Path(host_id): Path<Uuid>,
) -> Result<Json<Value>, (StatusCode, Json<Value>)> {
// Verify host exists.
let host_exists: bool =
sqlx::query_scalar("SELECT EXISTS(SELECT 1 FROM hosts WHERE id = $1)")
.bind(host_id)
.fetch_one(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, %host_id, "list_windows: host existence check failed");
err(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", "Database error")
})?;
if !host_exists {
return Err(err(StatusCode::NOT_FOUND, "not_found", "Host not found"));
}
let windows: Vec<MaintenanceWindow> = sqlx::query_as(
r#"
SELECT id, host_id, label, recurrence, start_at, duration_minutes,
recurrence_day, enabled, created_at, updated_at
FROM maintenance_windows
WHERE host_id = $1
ORDER BY created_at ASC
"#,
)
.bind(host_id)
.fetch_all(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, %host_id, "list_windows: query failed");
err(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", "Database error")
})?;
Ok(Json(json!({ "windows": windows })))
}
// ── POST /api/v1/hosts/:host_id/maintenance-windows ───────────────────────────
async fn create_window(
State(state): State<AppState>,
auth: AuthUser,
Path(host_id): Path<Uuid>,
Json(req): Json<CreateMaintenanceWindowRequest>,
) -> Result<Json<Value>, (StatusCode, Json<Value>)> {
// Validate: weekly requires recurrence_day 0-6
if req.recurrence == pm_core::models::WindowRecurrence::Weekly {
match req.recurrence_day {
Some(d) if (0..=6).contains(&d) => {}
_ => {
return Err(err(
StatusCode::BAD_REQUEST,
"bad_request",
"Weekly recurrence requires recurrence_day 0-6 (0=Sunday)",
));
}
}
}
// Validate: monthly requires recurrence_day 1-31
if req.recurrence == pm_core::models::WindowRecurrence::Monthly {
match req.recurrence_day {
Some(d) if (1..=31).contains(&d) => {}
_ => {
return Err(err(
StatusCode::BAD_REQUEST,
"bad_request",
"Monthly recurrence requires recurrence_day 1-31",
));
}
}
}
// Verify host exists.
let host_exists: bool =
sqlx::query_scalar("SELECT EXISTS(SELECT 1 FROM hosts WHERE id = $1)")
.bind(host_id)
.fetch_one(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, %host_id, "create_window: host existence check failed");
err(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", "Database error")
})?;
if !host_exists {
return Err(err(StatusCode::NOT_FOUND, "not_found", "Host not found"));
}
let duration = req.duration_minutes.unwrap_or(60);
let enabled = req.enabled.unwrap_or(true);
let window: MaintenanceWindow = sqlx::query_as(
r#"
INSERT INTO maintenance_windows
(host_id, label, recurrence, start_at, duration_minutes, recurrence_day, enabled)
VALUES
($1, $2, $3, $4, $5, $6, $7)
RETURNING id, host_id, label, recurrence, start_at, duration_minutes,
recurrence_day, enabled, created_at, updated_at
"#,
)
.bind(host_id)
.bind(&req.label)
.bind(&req.recurrence)
.bind(req.start_at)
.bind(duration)
.bind(req.recurrence_day)
.bind(enabled)
.fetch_one(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, %host_id, "create_window: insert failed");
err(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", "Database error")
})?;
log_event(
&state.db,
AuditAction::MaintenanceWindowCreated,
Some(auth.user_id),
Some(&auth.username),
Some("maintenance_window"),
Some(&window.id.to_string()),
json!({
"host_id": host_id,
"label": window.label,
"recurrence": window.recurrence.to_string(),
}),
None,
None,
)
.await;
tracing::info!(
window_id = %window.id,
%host_id,
recurrence = %window.recurrence,
user = %auth.username,
"Maintenance window created"
);
Ok(Json(json!(window)))
}
// ── PUT /api/v1/hosts/:host_id/maintenance-windows/:win_id ───────────────────
async fn update_window(
State(state): State<AppState>,
auth: AuthUser,
Path((host_id, win_id)): Path<(Uuid, Uuid)>,
Json(req): Json<UpdateMaintenanceWindowRequest>,
) -> Result<Json<Value>, (StatusCode, Json<Value>)> {
// Fetch existing record (verify ownership and existence).
let existing: Option<MaintenanceWindow> = sqlx::query_as(
r#"
SELECT id, host_id, label, recurrence, start_at, duration_minutes,
recurrence_day, enabled, created_at, updated_at
FROM maintenance_windows
WHERE id = $1 AND host_id = $2
"#,
)
.bind(win_id)
.bind(host_id)
.fetch_optional(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, %win_id, "update_window: fetch failed");
err(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", "Database error")
})?;
let existing = existing.ok_or_else(|| {
err(StatusCode::NOT_FOUND, "not_found", "Maintenance window not found")
})?;
// Apply partial updates using existing values as defaults.
let new_label = req.label.unwrap_or(existing.label);
let new_recurrence = req.recurrence.unwrap_or(existing.recurrence);
let new_start_at = req.start_at.unwrap_or(existing.start_at);
let new_duration = req.duration_minutes.unwrap_or(existing.duration_minutes);
let new_rec_day = req.recurrence_day.or(existing.recurrence_day);
let new_enabled = req.enabled.unwrap_or(existing.enabled);
// Validate recurrence_day for the final recurrence type.
if new_recurrence == pm_core::models::WindowRecurrence::Weekly {
match new_rec_day {
Some(d) if (0..=6).contains(&d) => {}
_ => {
return Err(err(
StatusCode::BAD_REQUEST,
"bad_request",
"Weekly recurrence requires recurrence_day 0-6",
));
}
}
}
if new_recurrence == pm_core::models::WindowRecurrence::Monthly {
match new_rec_day {
Some(d) if (1..=31).contains(&d) => {}
_ => {
return Err(err(
StatusCode::BAD_REQUEST,
"bad_request",
"Monthly recurrence requires recurrence_day 1-31",
));
}
}
}
let updated: MaintenanceWindow = sqlx::query_as(
r#"
UPDATE maintenance_windows
SET label = $3,
recurrence = $4,
start_at = $5,
duration_minutes = $6,
recurrence_day = $7,
enabled = $8,
updated_at = NOW()
WHERE id = $1 AND host_id = $2
RETURNING id, host_id, label, recurrence, start_at, duration_minutes,
recurrence_day, enabled, created_at, updated_at
"#,
)
.bind(win_id)
.bind(host_id)
.bind(&new_label)
.bind(&new_recurrence)
.bind(new_start_at)
.bind(new_duration)
.bind(new_rec_day)
.bind(new_enabled)
.fetch_one(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, %win_id, "update_window: update failed");
err(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", "Database error")
})?;
log_event(
&state.db,
AuditAction::MaintenanceWindowUpdated,
Some(auth.user_id),
Some(&auth.username),
Some("maintenance_window"),
Some(&win_id.to_string()),
json!({ "host_id": host_id }),
None,
None,
)
.await;
tracing::info!(
window_id = %win_id,
%host_id,
user = %auth.username,
"Maintenance window updated"
);
Ok(Json(json!(updated)))
}
// ── DELETE /api/v1/hosts/:host_id/maintenance-windows/:win_id ────────────────
async fn delete_window(
State(state): State<AppState>,
auth: AuthUser,
Path((host_id, win_id)): Path<(Uuid, Uuid)>,
) -> Result<Json<Value>, (StatusCode, Json<Value>)> {
let result = sqlx::query(
"DELETE FROM maintenance_windows WHERE id = $1 AND host_id = $2",
)
.bind(win_id)
.bind(host_id)
.execute(&state.db)
.await
.map_err(|e| {
tracing::error!(error = %e, %win_id, "delete_window: delete failed");
err(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", "Database error")
})?;
if result.rows_affected() == 0 {
return Err(err(
StatusCode::NOT_FOUND,
"not_found",
"Maintenance window not found",
));
}
log_event(
&state.db,
AuditAction::MaintenanceWindowDeleted,
Some(auth.user_id),
Some(&auth.username),
Some("maintenance_window"),
Some(&win_id.to_string()),
json!({ "host_id": host_id }),
None,
None,
)
.await;
tracing::info!(
window_id = %win_id,
%host_id,
user = %auth.username,
"Maintenance window deleted"
);
Ok(Json(json!({ "message": "Maintenance window deleted" })))
}

View File

@ -3,6 +3,8 @@ pub mod auth;
pub mod discovery;
pub mod groups;
pub mod hosts;
pub mod maintenance_windows;
pub mod jobs;
pub mod status;
pub mod users;
pub mod ws;

View File

@ -0,0 +1,212 @@
//! WebSocket relay routes — M7
//!
//! POST /api/v1/ws/ticket — create a single-use WS auth ticket (JWT-protected)
//! GET /api/v1/ws/jobs — browser WebSocket endpoint (ticket-authenticated)
use axum::{
extract::{Query, State, WebSocketUpgrade},
extract::ws::{Message, WebSocket},
http::StatusCode,
response::{Json, Response},
routing::{get, post},
Router,
};
use chrono::{Duration, Utc};
use pm_auth::rbac::AuthUser;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use sqlx::postgres::PgListener;
use ulid::Ulid;
use uuid::Uuid;
use crate::AppState;
// ── WsTicket ──────────────────────────────────────────────────────────────────
/// Single-use WebSocket authentication ticket stored in-memory.
#[derive(Debug, Clone)]
pub struct WsTicket {
pub user_id: Uuid,
pub role: String,
pub expires_at: chrono::DateTime<Utc>,
}
// ── Router ────────────────────────────────────────────────────────────────────
/// Router for ticket-issuance endpoint (JWT-protected, merged into protected_api).
pub fn ticket_router() -> Router<AppState> {
Router::new().route("/ws/ticket", post(create_ticket_handler))
}
/// Router for the WebSocket endpoint (ticket-authenticated, NO JWT middleware).
pub fn ws_router() -> Router<AppState> {
Router::new().route("/api/v1/ws/jobs", get(ws_handler))
}
// ── Error helper ─────────────────────────────────────────────────────────────
#[inline]
fn err(
status: StatusCode,
code: &'static str,
message: impl Into<String>,
) -> (StatusCode, Json<Value>) {
(
status,
Json(json!({ "error": { "code": code, "message": message.into() } })),
)
}
// ── POST /api/v1/ws/ticket ────────────────────────────────────────────────────
/// Issue a single-use WebSocket authentication ticket (60 s expiry).
pub async fn create_ticket_handler(
State(state): State<AppState>,
auth: AuthUser,
) -> Result<Json<Value>, (StatusCode, Json<Value>)> {
let ticket_id = Ulid::new().to_string();
let expires_at = Utc::now() + Duration::seconds(60);
let ticket = WsTicket {
user_id: auth.user_id,
role: auth.role.as_str().to_string(),
expires_at,
};
state.ws_tickets.insert(ticket_id.clone(), ticket);
tracing::info!(
user_id = %auth.user_id,
username = %auth.username,
ticket = %ticket_id,
"WS ticket issued"
);
Ok(Json(json!({ "ticket": ticket_id })))
}
// ── GET /api/v1/ws/jobs ───────────────────────────────────────────────────────
#[derive(Debug, Deserialize)]
pub struct WsQuery {
pub ticket: String,
}
/// Browser WebSocket upgrade endpoint — authenticates via single-use ticket.
pub async fn ws_handler(
State(state): State<AppState>,
Query(q): Query<WsQuery>,
ws: WebSocketUpgrade,
) -> Result<Response, (StatusCode, Json<Value>)> {
// Validate and consume the ticket atomically.
let ticket = {
let entry = state.ws_tickets.get(&q.ticket);
match entry {
None => {
return Err(err(
StatusCode::UNAUTHORIZED,
"invalid_ticket",
"WebSocket ticket not found or already used",
));
}
Some(t) => {
if t.expires_at < Utc::now() {
drop(t);
state.ws_tickets.remove(&q.ticket);
return Err(err(
StatusCode::UNAUTHORIZED,
"ticket_expired",
"WebSocket ticket has expired",
));
}
t.clone()
}
}
};
// Single-use: remove immediately after validation.
state.ws_tickets.remove(&q.ticket);
tracing::info!(
user_id = %ticket.user_id,
role = %ticket.role,
"Browser WebSocket connection upgraded"
);
let db = state.db.clone();
Ok(ws.on_upgrade(move |socket| handle_browser_ws(socket, db, ticket)))
}
// ── WebSocket handler ─────────────────────────────────────────────────────────
/// Drive the browser WebSocket: LISTEN on `job_update` and forward payloads.
async fn handle_browser_ws(
mut socket: WebSocket,
db: sqlx::PgPool,
ticket: WsTicket,
) {
// Acquire a dedicated PG listener connection.
let mut listener = match PgListener::connect_with(&db).await {
Ok(l) => l,
Err(e) => {
tracing::error!(error = %e, user_id = %ticket.user_id, "Failed to create PgListener");
let _ = socket
.send(Message::Text(
json!({ "error": "internal_error" }).to_string().into(),
))
.await;
return;
}
};
if let Err(e) = listener.listen("job_update").await {
tracing::error!(error = %e, user_id = %ticket.user_id, "PgListener LISTEN failed");
return;
}
tracing::info!(user_id = %ticket.user_id, "Browser WS: LISTEN job_update started");
loop {
tokio::select! {
// Forward PG notifications to the browser.
notify_result = listener.recv() => {
match notify_result {
Ok(notification) => {
let payload = notification.payload().to_string();
tracing::debug!(user_id = %ticket.user_id, payload = %payload, "Forwarding job_update");
if socket.send(Message::Text(payload.into())).await.is_err() {
tracing::info!(user_id = %ticket.user_id, "Browser WS send failed — client disconnected");
break;
}
}
Err(e) => {
tracing::error!(error = %e, user_id = %ticket.user_id, "PgListener recv error");
break;
}
}
}
// Handle incoming frames from the browser (ping/close).
msg = socket.recv() => {
match msg {
Some(Ok(Message::Close(_))) | None => {
tracing::info!(user_id = %ticket.user_id, "Browser WS closed by client");
break;
}
Some(Ok(Message::Ping(data))) => {
if socket.send(Message::Pong(data)).await.is_err() {
break;
}
}
Some(Err(e)) => {
tracing::debug!(error = %e, user_id = %ticket.user_id, "Browser WS recv error");
break;
}
_ => {}
}
}
}
}
tracing::info!(user_id = %ticket.user_id, "Browser WS handler exiting");
}