Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 87601fe510 | |||
| 76c26aa379 | |||
| 8ca616a02c | |||
| 8b6d9ed861 | |||
| c44045db38 | |||
| 76ce246893 | |||
| 6ba708abb1 |
BIN
.a0proj/audit.db
BIN
.a0proj/audit.db
Binary file not shown.
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -1859,7 +1859,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "linux-patch-api"
|
||||
version = "0.3.2"
|
||||
version = "0.3.6"
|
||||
dependencies = [
|
||||
"actix",
|
||||
"actix-rt",
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "linux-patch-api"
|
||||
version = "0.3.4"
|
||||
version = "0.3.7"
|
||||
edition = "2021"
|
||||
authors = ["Echo <echo@moon-dragon.us>"]
|
||||
description = "Secure remote package management API for Linux systems"
|
||||
|
||||
@ -17,16 +17,17 @@ RuntimeDirectory=linux-patch-api
|
||||
RuntimeDirectoryMode=0755
|
||||
|
||||
# Security hardening
|
||||
# Allow reboot capability for scheduled reboots
|
||||
CapabilityBoundingSet=CAP_SYS_BOOT
|
||||
AmbientCapabilities=CAP_SYS_BOOT
|
||||
# ProtectSystem removed - package management requires write access to /usr, /etc, /lib
|
||||
# Network security provided by mTLS + IP whitelist
|
||||
# NOTE: Package management requires extensive system access. The following
|
||||
# restrictions have been removed because they block core functionality:
|
||||
# - ProtectSystem=strict: Blocks writes to /usr, /etc, /lib where packages install
|
||||
# - NoNewPrivileges: Blocks sudo/setuid which apt needs for _apt sandbox
|
||||
# - RestrictSUIDSGID: Blocks setuid/setgid which apt needs for _apt sandbox
|
||||
# - CapabilityBoundingSet: Drops capabilities that apt needs (SETUID, SETGID, CHOWN, etc.)
|
||||
# - AmbientCapabilities: Same issue as CapabilityBoundingSet
|
||||
# Network security is provided by mTLS + IP whitelist. The service runs as root
|
||||
# and MUST be able to install/remove/update packages system-wide.
|
||||
ProtectHome=true
|
||||
# ReadWritePaths kept as documentation reference for apt/dpkg paths
|
||||
ReadWritePaths=/var/lib/linux_patch_api /var/log/linux_patch_api /var/cache/apt /var/lib/apt /var/lib/dpkg /var/log/apt
|
||||
PrivateTmp=true
|
||||
PrivateDevices=true
|
||||
ProtectHostname=true
|
||||
ProtectClock=true
|
||||
ProtectKernelTunables=true
|
||||
@ -36,8 +37,6 @@ RestrictNamespaces=true
|
||||
LockPersonality=true
|
||||
MemoryDenyWriteExecute=false
|
||||
RestrictRealtime=true
|
||||
# RestrictSUIDSGID removed - package management requires setuid/setgid for apt/dpkg
|
||||
RemoveIPC=true
|
||||
|
||||
# System call filtering (whitelist approach)
|
||||
SystemCallFilter=@system-service
|
||||
|
||||
88
debian/changelog
vendored
88
debian/changelog
vendored
@ -1,58 +1,54 @@
|
||||
linux-patch-api (0.3.7-1) unstable; urgency=low
|
||||
|
||||
* Add GET /api/v1/system/services/{name} endpoint for service health checks
|
||||
* Add ServiceStatus struct with systemd and OpenRC support
|
||||
* Add get_service_status() to PackageManagerBackend trait
|
||||
* Implement systemd service status via systemctl
|
||||
* Implement OpenRC service status via rc-service
|
||||
* Add E2E test for service status endpoint
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Mon, 04 May 2026 23:44:00 -0500
|
||||
|
||||
linux-patch-api (0.3.5-1) unstable; urgency=low
|
||||
|
||||
* Remove CapabilityBoundingSet and AmbientCapabilities - apt needs full root capabilities
|
||||
* Remove ProtectSystem=strict, NoNewPrivileges, RestrictSUIDSGID - block core functionality
|
||||
* Remove ReadWritePaths - unnecessary without ProtectSystem=strict
|
||||
* Fix E2E test: properly FAIL on status=failed package operations
|
||||
* Fix E2E test: require status=completed for install/update/remove lifecycle
|
||||
* Update service file Type=notify -> Type=simple
|
||||
* Add DEBIAN_FRONTEND=noninteractive environment variable
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Sat, 03 May 2026 03:15:00 -0500
|
||||
|
||||
linux-patch-api (0.3.4-1) unstable; urgency=low
|
||||
|
||||
* Fix CI workflow: prevent recursive tag triggers (v* -> v*.*.*)
|
||||
* Fix CI workflow: upload u2204 deb to same release (no -u2204 suffix)
|
||||
* Remove sudo from apt commands (service runs as root)
|
||||
* Remove NoNewPrivileges and RestrictSUIDSGID from systemd service
|
||||
* Fix dpkg packaging: remove linux-patch-api user creation
|
||||
* Remove NoNewPrivileges and RestrictSUIDSGID from service file
|
||||
* Update service file Type=notify -> Type=simple
|
||||
* Add DEBIAN_FRONTEND=noninteractive environment variable
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Fri, 02 May 2026 22:00:00 -0500
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Sat, 03 May 2026 03:15:00 -0500
|
||||
linux-patch-api (0.3.3-1) unstable; urgency=low
|
||||
|
||||
* Fix dpkg packaging: Remove linux-patch-api user creation, fix directory ownership
|
||||
* Fix package install: Remove sudo from apt commands (service runs as root)
|
||||
* Remove NoNewPrivileges and RestrictSUIDSGID from systemd service
|
||||
* Fix dpkg packaging: remove linux-patch-api user creation
|
||||
* Change ownership to root:root in preinst/postinst scripts
|
||||
* Bump version to 0.3.3
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Fri, 02 May 2026 21:45:00 -0500
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Sat, 03 May 2026 02:30:00 -0500
|
||||
linux-patch-api (0.3.2-1) unstable; urgency=low
|
||||
|
||||
* Fix package install: Remove sudo from apt commands (service runs as root)
|
||||
* Fix reboot endpoint: Implement actual system reboot via shutdown/systemctl
|
||||
* Fix patches handler: Call reboot_system() instead of just logging
|
||||
* Remove NoNewPrivileges and RestrictSUIDSGID from systemd service
|
||||
* Add CAP_SYS_BOOT capability to systemd service for LXC reboot support
|
||||
* Fix dpkg packaging: Remove linux-patch-api user creation, fix directory ownership
|
||||
* Remove sudo from apt commands in source code
|
||||
* Remove NoNewPrivileges=true from service file
|
||||
* Remove RestrictSUIDSGID=true from service file
|
||||
* Add DEBIAN_FRONTEND=noninteractive to service file
|
||||
* Fix TLS 1.3 enforcement in mtls.rs
|
||||
* Add client_disconnect_timeout to main.rs
|
||||
* Optimize RwLock usage in jobs/manager.rs
|
||||
* Bump version to 0.3.2
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Sat, 02 May 2026 21:25:00 -0500
|
||||
linux-patch-api (0.3.1-1) unstable; urgency=low
|
||||
|
||||
* Fix reboot endpoint: Implement actual system reboot via shutdown/systemctl
|
||||
* Fix patches handler: Call reboot_system() instead of just logging
|
||||
* Add CAP_SYS_BOOT capability to systemd service for LXC reboot support
|
||||
* Remove unused warn import
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Sat, 02 May 2026 20:37:00 -0500
|
||||
linux-patch-api (0.3.0-1) unstable; urgency=low
|
||||
|
||||
* v0.3.0 beta release
|
||||
* Fix List Jobs connection reset: Add client_disconnect_timeout (5s)
|
||||
* Enforce TLS 1.3 only with builder_with_provider()
|
||||
* Fix RwLock contention: Release read lock before sorting in list_jobs()
|
||||
* Fix systemd service: Remove ProtectSystem=strict
|
||||
* Fix systemd service: Change Type=notify to Type=simple
|
||||
* Fix systemd service: Add DEBIAN_FRONTEND=noninteractive
|
||||
* Add Ubuntu 22.04 CI build job
|
||||
* Add apt-get -f install for broken runner deps
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Sat, 02 May 2026 19:55:00 -0500
|
||||
linux-patch-api (1.0.0-1) stable; urgency=medium
|
||||
|
||||
* Initial production release
|
||||
* Secure mTLS-authenticated REST API for remote package management
|
||||
* 15 API endpoints for package install/remove, patch application, system management
|
||||
* Asynchronous job processing with WebSocket status streaming
|
||||
* IP whitelist enforcement and comprehensive audit logging
|
||||
* Systemd integration with security hardening
|
||||
* Supports Debian 11/12, Ubuntu 20.04/22.04/24.04
|
||||
|
||||
-- Echo <echo@moon-dragon.us> Thu, 09 Apr 2026 18:57:12 -0500
|
||||
-- Echo <echo@moon-dragon.us> Fri, 02 May 2026 21:30:00 -0500
|
||||
|
||||
@ -5,7 +5,8 @@ After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
Type=simple
|
||||
NotifyAccess=all
|
||||
ExecStart=/usr/bin/linux-patch-api --config /etc/linux_patch_api/config.yaml
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
@ -16,12 +17,17 @@ RuntimeDirectory=linux-patch-api
|
||||
RuntimeDirectoryMode=0755
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=true
|
||||
ProtectSystem=strict
|
||||
# NOTE: Package management requires extensive system access. The following
|
||||
# restrictions have been removed because they block core functionality:
|
||||
# - ProtectSystem=strict: Blocks writes to /usr, /etc, /lib where packages install
|
||||
# - NoNewPrivileges: Blocks sudo/setuid which apt needs for _apt sandbox
|
||||
# - RestrictSUIDSGID: Blocks setuid/setgid which apt needs for _apt sandbox
|
||||
# - CapabilityBoundingSet: Drops capabilities that apt needs (SETUID, SETGID, CHOWN, etc.)
|
||||
# - AmbientCapabilities: Same issue as CapabilityBoundingSet
|
||||
# Network security is provided by mTLS + IP whitelist. The service runs as root
|
||||
# and MUST be able to install/remove/update packages system-wide.
|
||||
ProtectHome=true
|
||||
ReadWritePaths=/var/lib/linux_patch_api /var/log/linux_patch_api
|
||||
PrivateTmp=true
|
||||
PrivateDevices=true
|
||||
ProtectHostname=true
|
||||
ProtectClock=true
|
||||
ProtectKernelTunables=true
|
||||
@ -31,8 +37,6 @@ RestrictNamespaces=true
|
||||
LockPersonality=true
|
||||
MemoryDenyWriteExecute=false
|
||||
RestrictRealtime=true
|
||||
RestrictSUIDSGID=true
|
||||
RemoveIPC=true
|
||||
|
||||
# System call filtering (whitelist approach)
|
||||
SystemCallFilter=@system-service
|
||||
@ -40,6 +44,7 @@ SystemCallErrorNumber=EPERM
|
||||
|
||||
# Environment
|
||||
Environment="RUST_BACKTRACE=1"
|
||||
Environment="DEBIAN_FRONTEND=noninteractive"
|
||||
Environment="RUST_LOG=info"
|
||||
|
||||
# Logging
|
||||
|
||||
@ -15,4 +15,5 @@ pub mod websocket;
|
||||
|
||||
// Re-export commonly used types
|
||||
pub use packages::{ApiError, ApiResponse};
|
||||
pub use websocket::{WsClientMessage, WsServerMessage};
|
||||
// WebSocket message types are now in crate::jobs::websocket
|
||||
pub use crate::jobs::websocket::{WsClientMessage, WsServerMessage};
|
||||
|
||||
@ -47,6 +47,19 @@ pub struct HealthData {
|
||||
pub version: String,
|
||||
}
|
||||
|
||||
/// Service status response data
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ServiceStatusData {
|
||||
pub name: String,
|
||||
pub display_name: String,
|
||||
pub active_state: String,
|
||||
pub sub_state: String,
|
||||
pub load_state: String,
|
||||
pub enabled_state: String,
|
||||
pub main_pid: Option<u32>,
|
||||
pub healthy: bool,
|
||||
}
|
||||
|
||||
/// Reboot request
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct RebootRequest {
|
||||
@ -228,12 +241,80 @@ pub async fn reboot_system(
|
||||
}
|
||||
}
|
||||
|
||||
/// Get service status
|
||||
pub async fn get_service_status(
|
||||
path: web::Path<String>,
|
||||
backend: web::Data<Box<dyn PackageManagerBackend>>,
|
||||
_req: HttpRequest,
|
||||
) -> impl Responder {
|
||||
let request_id = Uuid::new_v4().to_string();
|
||||
let service_name = path.into_inner();
|
||||
|
||||
info!(
|
||||
request_id = %request_id,
|
||||
service = %service_name,
|
||||
"Getting service status"
|
||||
);
|
||||
|
||||
// Validate service name
|
||||
if service_name.is_empty() || service_name.contains('/') || service_name.contains("..") {
|
||||
let response = ApiResponse::<()>::error(
|
||||
"INVALID_SERVICE_NAME",
|
||||
&format!("Invalid service name: {}", service_name),
|
||||
None,
|
||||
false,
|
||||
);
|
||||
return HttpResponse::BadRequest().json(response);
|
||||
}
|
||||
|
||||
match backend.get_service_status(&service_name) {
|
||||
Ok(Some(status)) => {
|
||||
let response = ApiResponse::success(ServiceStatusData {
|
||||
name: status.name,
|
||||
display_name: status.display_name,
|
||||
active_state: status.active_state,
|
||||
sub_state: status.sub_state,
|
||||
load_state: status.load_state,
|
||||
enabled_state: status.enabled_state,
|
||||
main_pid: status.main_pid,
|
||||
healthy: status.healthy,
|
||||
});
|
||||
HttpResponse::Ok().json(response)
|
||||
}
|
||||
Ok(None) => {
|
||||
let response = ApiResponse::<()>::error(
|
||||
"SERVICE_NOT_FOUND",
|
||||
&format!("Service '{}' not found", service_name),
|
||||
None,
|
||||
false,
|
||||
);
|
||||
HttpResponse::NotFound().json(response)
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
request_id = %request_id,
|
||||
service = %service_name,
|
||||
error = %e,
|
||||
"Failed to get service status"
|
||||
);
|
||||
let response = ApiResponse::<()>::error(
|
||||
"SERVICE_STATUS_ERROR",
|
||||
&format!("Failed to get service status: {}", e),
|
||||
None,
|
||||
true,
|
||||
);
|
||||
HttpResponse::InternalServerError().json(response)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Configure routes for system endpoints
|
||||
pub fn configure_routes(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::scope("/system")
|
||||
.route("/info", web::get().to(get_system_info))
|
||||
.route("/reboot", web::post().to(reboot_system)),
|
||||
.route("/reboot", web::post().to(reboot_system))
|
||||
.route("/services/{name}", web::get().to(get_service_status)),
|
||||
)
|
||||
.route("/health", web::get().to(health_check));
|
||||
}
|
||||
|
||||
@ -3,128 +3,34 @@
|
||||
//! Implements WebSocket endpoint for real-time job status updates:
|
||||
//! - WS /api/v1/ws/jobs - Real-time job status streaming
|
||||
//!
|
||||
//! Note: Full WebSocket implementation requires actix-web-actors compatibility.
|
||||
//! This stub provides the endpoint structure for future enhancement.
|
||||
//! Uses actix-web-actors for proper WebSocket handshake and protocol handling.
|
||||
//! The actual actor logic lives in crate::jobs::websocket::WsJobActor.
|
||||
|
||||
use actix_web::{http::StatusCode, web, Error, HttpRequest, HttpResponse};
|
||||
use chrono::Utc;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use actix_web::{web, Error, HttpRequest, HttpResponse};
|
||||
use tracing::info;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::jobs::manager::JobManager;
|
||||
|
||||
/// WebSocket message from client
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
#[serde(tag = "action")]
|
||||
pub enum WsClientMessage {
|
||||
#[serde(rename = "subscribe")]
|
||||
Subscribe {
|
||||
#[serde(default)]
|
||||
job_id: Option<String>,
|
||||
},
|
||||
#[serde(rename = "unsubscribe")]
|
||||
Unsubscribe { job_id: String },
|
||||
}
|
||||
|
||||
/// WebSocket message to client
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct WsServerMessage {
|
||||
pub event: String,
|
||||
pub job_id: String,
|
||||
pub status: String,
|
||||
pub progress: u8,
|
||||
pub message: String,
|
||||
pub timestamp: String,
|
||||
}
|
||||
|
||||
impl WsServerMessage {
|
||||
pub fn job_status(job_id: &str, status: &str, progress: u8, message: &str) -> Self {
|
||||
Self {
|
||||
event: "job_status".to_string(),
|
||||
job_id: job_id.to_string(),
|
||||
status: status.to_string(),
|
||||
progress,
|
||||
message: message.to_string(),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn job_complete(job_id: &str, status: &str, message: &str) -> Self {
|
||||
Self {
|
||||
event: "job_complete".to_string(),
|
||||
job_id: job_id.to_string(),
|
||||
status: status.to_string(),
|
||||
progress: 100,
|
||||
message: message.to_string(),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
}
|
||||
}
|
||||
}
|
||||
use crate::jobs::websocket::WsJobActor;
|
||||
|
||||
/// Handle WebSocket connection request
|
||||
/// Returns upgrade response for WebSocket handshake
|
||||
/// Performs the WebSocket handshake and spawns a WsJobActor
|
||||
/// that streams job status events to the connected client.
|
||||
pub async fn websocket_handler(
|
||||
req: HttpRequest,
|
||||
_job_manager: web::Data<JobManager>,
|
||||
stream: web::Payload,
|
||||
job_manager: web::Data<JobManager>,
|
||||
) -> Result<HttpResponse, Error> {
|
||||
let ws_id = Uuid::new_v4();
|
||||
info!(ws_id = %ws_id, "WebSocket connection request");
|
||||
info!("WebSocket connection request received");
|
||||
|
||||
// Check if this is a WebSocket upgrade request
|
||||
if req
|
||||
.headers()
|
||||
.get("upgrade")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|v| v.eq_ignore_ascii_case("websocket"))
|
||||
.unwrap_or(false)
|
||||
{
|
||||
// WebSocket upgrade requested
|
||||
// In full implementation, this would use actix-web-actors::ws::start()
|
||||
// For now, return a response indicating WebSocket support
|
||||
// Subscribe to job status events from the JobManager broadcast channel
|
||||
let event_rx = job_manager.subscribe();
|
||||
|
||||
let response_msg = serde_json::json!({
|
||||
"event": "connected",
|
||||
"ws_id": ws_id.to_string(),
|
||||
"timestamp": Utc::now().to_rfc3339(),
|
||||
"message": "WebSocket endpoint ready. Full implementation requires actix-web-actors compatibility.",
|
||||
"polling_alternative": "Use GET /api/v1/jobs/{id} for job status polling"
|
||||
});
|
||||
// Create the WebSocket actor with the broadcast receiver
|
||||
let actor = WsJobActor::new(event_rx);
|
||||
|
||||
// Return HTTP 101 Switching Protocols for WebSocket upgrade
|
||||
// In production, this would be handled by actix-web-actors
|
||||
Ok(HttpResponse::build(StatusCode::SWITCHING_PROTOCOLS)
|
||||
.insert_header(("upgrade", "websocket"))
|
||||
.insert_header(("connection", "upgrade"))
|
||||
.json(response_msg))
|
||||
} else {
|
||||
// Not a WebSocket request - return info about the endpoint
|
||||
let info_msg = serde_json::json!({
|
||||
"endpoint": "/api/v1/ws/jobs",
|
||||
"method": "GET",
|
||||
"upgrade_required": "websocket",
|
||||
"headers": {
|
||||
"upgrade": "websocket",
|
||||
"connection": "Upgrade",
|
||||
"sec-websocket-key": "<base64-key>",
|
||||
"sec-websocket-version": "13"
|
||||
},
|
||||
"alternative": "Use GET /api/v1/jobs/{id} for job status polling"
|
||||
});
|
||||
|
||||
Ok(HttpResponse::Ok().json(info_msg))
|
||||
}
|
||||
}
|
||||
|
||||
/// Broadcast job status update to subscribed WebSocket clients
|
||||
pub async fn broadcast_job_update(
|
||||
job_id: &Uuid,
|
||||
status: &crate::jobs::manager::JobStatus,
|
||||
progress: u8,
|
||||
_message: &str,
|
||||
) {
|
||||
info!(job_id = %job_id, status = ?status, progress = progress, "Job status update available for broadcast");
|
||||
// In production, would use a broadcast channel to notify all subscribed WebSocket clients
|
||||
// Perform the WebSocket handshake and start the actor
|
||||
// This computes the proper Sec-WebSocket-Accept header and upgrades the connection
|
||||
actix_web_actors::ws::start(actor, &req, stream)
|
||||
}
|
||||
|
||||
/// Configure WebSocket route
|
||||
@ -134,7 +40,7 @@ pub fn configure_routes(cfg: &mut web::ServiceConfig) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::jobs::websocket::{WsClientMessage, WsServerMessage};
|
||||
|
||||
#[test]
|
||||
fn test_ws_server_message_serialization() {
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
//! Job Manager - Async job queue management
|
||||
//!
|
||||
//! Manages async job execution with concurrency limits and timeout enforcement.
|
||||
//! Broadcasts job status events via tokio broadcast channel for WebSocket streaming.
|
||||
|
||||
use anyhow::Result;
|
||||
use chrono::{DateTime, Utc};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::sync::{broadcast, RwLock};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Job status
|
||||
@ -21,6 +22,20 @@ pub enum JobStatus {
|
||||
TimedOut,
|
||||
}
|
||||
|
||||
/// Convert JobStatus to lowercase string for WebSocket events
|
||||
impl JobStatus {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
JobStatus::Pending => "pending",
|
||||
JobStatus::Running => "running",
|
||||
JobStatus::Completed => "completed",
|
||||
JobStatus::Failed => "failed",
|
||||
JobStatus::Cancelled => "cancelled",
|
||||
JobStatus::TimedOut => "timed_out",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Job operation type
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub enum JobOperation {
|
||||
@ -110,20 +125,35 @@ impl Job {
|
||||
}
|
||||
}
|
||||
|
||||
/// Job Manager - handles async job queue with limits
|
||||
/// Job status event broadcast to WebSocket clients
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct JobStatusEvent {
|
||||
pub event: String,
|
||||
pub job_id: Uuid,
|
||||
pub status: String,
|
||||
pub progress: u8,
|
||||
pub message: String,
|
||||
pub timestamp: String,
|
||||
}
|
||||
|
||||
/// Job Manager - handles async job queue with limits and WebSocket broadcast
|
||||
pub struct JobManager {
|
||||
max_concurrent: usize,
|
||||
timeout_minutes: u64,
|
||||
jobs: Arc<RwLock<HashMap<Uuid, Job>>>,
|
||||
/// Broadcast sender for job status events
|
||||
event_sender: broadcast::Sender<JobStatusEvent>,
|
||||
}
|
||||
|
||||
impl JobManager {
|
||||
/// Create a new job manager
|
||||
pub fn new(max_concurrent: usize, timeout_minutes: u64) -> Result<Self> {
|
||||
let (event_sender, _) = broadcast::channel(256);
|
||||
Ok(Self {
|
||||
max_concurrent,
|
||||
timeout_minutes,
|
||||
jobs: Arc::new(RwLock::new(HashMap::new())),
|
||||
event_sender,
|
||||
})
|
||||
}
|
||||
|
||||
@ -137,13 +167,46 @@ impl JobManager {
|
||||
self.max_concurrent
|
||||
}
|
||||
|
||||
/// Subscribe to job status events
|
||||
/// Returns a broadcast receiver that will receive JobStatusEvent messages
|
||||
pub fn subscribe(&self) -> broadcast::Receiver<JobStatusEvent> {
|
||||
self.event_sender.subscribe()
|
||||
}
|
||||
|
||||
/// Emit a job status event to all subscribers
|
||||
fn emit_event(
|
||||
&self,
|
||||
event_type: &str,
|
||||
job_id: &Uuid,
|
||||
status: &JobStatus,
|
||||
progress: u8,
|
||||
message: &str,
|
||||
) {
|
||||
let event = JobStatusEvent {
|
||||
event: event_type.to_string(),
|
||||
job_id: *job_id,
|
||||
status: status.as_str().to_string(),
|
||||
progress,
|
||||
message: message.to_string(),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
};
|
||||
// Ignore send errors (no receivers is fine)
|
||||
let _ = self.event_sender.send(event);
|
||||
}
|
||||
|
||||
/// Create a new job and return its ID
|
||||
pub async fn create_job(&self, operation: JobOperation, packages: Vec<String>) -> Result<Uuid> {
|
||||
let job = Job::new(operation, packages);
|
||||
let job_id = job.id;
|
||||
let status = job.status.clone();
|
||||
let progress = job.progress;
|
||||
let message = job.message.clone();
|
||||
|
||||
let mut jobs = self.jobs.write().await;
|
||||
jobs.insert(job_id, job);
|
||||
drop(jobs); // Release lock before emitting event
|
||||
|
||||
self.emit_event("job_status", &job_id, &status, progress, &message);
|
||||
|
||||
Ok(job_id)
|
||||
}
|
||||
@ -162,17 +225,28 @@ impl JobManager {
|
||||
progress: Option<u8>,
|
||||
message: Option<String>,
|
||||
) -> Result<()> {
|
||||
let mut jobs = self.jobs.write().await;
|
||||
let event_data;
|
||||
{
|
||||
let mut jobs = self.jobs.write().await;
|
||||
|
||||
if let Some(job) = jobs.get_mut(job_id) {
|
||||
job.status = status;
|
||||
if let Some(p) = progress {
|
||||
job.progress = p;
|
||||
if let Some(job) = jobs.get_mut(job_id) {
|
||||
job.status = status;
|
||||
if let Some(p) = progress {
|
||||
job.progress = p;
|
||||
}
|
||||
if let Some(m) = message {
|
||||
job.message = m;
|
||||
}
|
||||
job.updated_at = Utc::now();
|
||||
|
||||
event_data = Some((job.status.clone(), job.progress, job.message.clone()));
|
||||
} else {
|
||||
event_data = None;
|
||||
}
|
||||
if let Some(m) = message {
|
||||
job.message = m;
|
||||
}
|
||||
job.updated_at = Utc::now();
|
||||
} // Write lock dropped here
|
||||
|
||||
if let Some((status, progress, message)) = event_data {
|
||||
self.emit_event("job_status", job_id, &status, progress, &message);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@ -191,10 +265,20 @@ impl JobManager {
|
||||
|
||||
/// Mark a job as completed
|
||||
pub async fn complete_job(&self, job_id: &Uuid) -> Result<()> {
|
||||
let mut jobs = self.jobs.write().await;
|
||||
let event_data;
|
||||
{
|
||||
let mut jobs = self.jobs.write().await;
|
||||
|
||||
if let Some(job) = jobs.get_mut(job_id) {
|
||||
job.complete();
|
||||
if let Some(job) = jobs.get_mut(job_id) {
|
||||
job.complete();
|
||||
event_data = Some((job.status.clone(), job.progress, job.message.clone()));
|
||||
} else {
|
||||
event_data = None;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((status, progress, message)) = event_data {
|
||||
self.emit_event("job_status", job_id, &status, progress, &message);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@ -202,10 +286,20 @@ impl JobManager {
|
||||
|
||||
/// Mark a job as failed
|
||||
pub async fn fail_job(&self, job_id: &Uuid, error: String) -> Result<()> {
|
||||
let mut jobs = self.jobs.write().await;
|
||||
let event_data;
|
||||
{
|
||||
let mut jobs = self.jobs.write().await;
|
||||
|
||||
if let Some(job) = jobs.get_mut(job_id) {
|
||||
job.fail(error);
|
||||
if let Some(job) = jobs.get_mut(job_id) {
|
||||
job.fail(error);
|
||||
event_data = Some((job.status.clone(), job.progress, job.message.clone()));
|
||||
} else {
|
||||
event_data = None;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((status, progress, message)) = event_data {
|
||||
self.emit_event("job_status", job_id, &status, progress, &message);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@ -308,6 +402,7 @@ impl Clone for JobManager {
|
||||
max_concurrent: self.max_concurrent,
|
||||
timeout_minutes: self.timeout_minutes,
|
||||
jobs: self.jobs.clone(),
|
||||
event_sender: self.event_sender.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,3 +1,424 @@
|
||||
//! Job WebSocket Handler
|
||||
//! Job WebSocket Actor
|
||||
//!
|
||||
//! Placeholder - implementation in future phases
|
||||
//! Implements real-time WebSocket streaming for job status updates using
|
||||
//! actix-web-actors. Each connected client gets a WsJobActor that:
|
||||
//! - Subscribes to JobManager broadcast channel for job status events
|
||||
//! - Filters events based on client subscribe/unsubscribe messages
|
||||
//! - Forwards matching events as JSON to the WebSocket client
|
||||
//! - Handles ping/pong heartbeat for connection keep-alive
|
||||
//! - Cleans up on disconnect
|
||||
|
||||
use actix::prelude::*;
|
||||
use actix_web_actors::ws;
|
||||
use chrono::Utc;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::sync::broadcast;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::manager::JobStatusEvent;
|
||||
|
||||
/// How often heartbeat pings are sent (seconds)
|
||||
const HEARTBEAT_INTERVAL: Duration = Duration::from_secs(5);
|
||||
/// How long before lack of client response causes a disconnect (seconds)
|
||||
const CLIENT_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Client-to-server WebSocket message
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
#[serde(tag = "action")]
|
||||
pub enum WsClientMessage {
|
||||
/// Subscribe to events for a specific job, or all jobs if job_id is None
|
||||
#[serde(rename = "subscribe")]
|
||||
Subscribe {
|
||||
#[serde(default)]
|
||||
job_id: Option<String>,
|
||||
},
|
||||
/// Unsubscribe from events for a specific job
|
||||
#[serde(rename = "unsubscribe")]
|
||||
Unsubscribe { job_id: String },
|
||||
}
|
||||
|
||||
/// Server-to-client WebSocket message
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct WsServerMessage {
|
||||
pub event: String,
|
||||
pub job_id: String,
|
||||
pub status: String,
|
||||
pub progress: u8,
|
||||
pub message: String,
|
||||
pub timestamp: String,
|
||||
}
|
||||
|
||||
impl WsServerMessage {
|
||||
/// Create a job status message from a JobStatusEvent
|
||||
pub fn from_job_status_event(event: &JobStatusEvent) -> Self {
|
||||
Self {
|
||||
event: event.event.clone(),
|
||||
job_id: event.job_id.to_string(),
|
||||
status: event.status.clone(),
|
||||
progress: event.progress,
|
||||
message: event.message.clone(),
|
||||
timestamp: event.timestamp.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a connection established message
|
||||
pub fn connected(ws_id: &Uuid) -> Self {
|
||||
Self {
|
||||
event: "connected".to_string(),
|
||||
job_id: String::new(),
|
||||
status: "connected".to_string(),
|
||||
progress: 0,
|
||||
message: format!("WebSocket connected: {}", ws_id),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a subscription confirmation message
|
||||
pub fn subscribed(job_id: &Option<String>) -> Self {
|
||||
match job_id {
|
||||
Some(id) => Self {
|
||||
event: "subscribed".to_string(),
|
||||
job_id: id.clone(),
|
||||
status: "subscribed".to_string(),
|
||||
progress: 0,
|
||||
message: format!("Subscribed to job: {}", id),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
},
|
||||
None => Self {
|
||||
event: "subscribed".to_string(),
|
||||
job_id: "all".to_string(),
|
||||
status: "subscribed".to_string(),
|
||||
progress: 0,
|
||||
message: "Subscribed to all job events".to_string(),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an unsubscription confirmation message
|
||||
pub fn unsubscribed(job_id: &str) -> Self {
|
||||
Self {
|
||||
event: "unsubscribed".to_string(),
|
||||
job_id: job_id.to_string(),
|
||||
status: "unsubscribed".to_string(),
|
||||
progress: 0,
|
||||
message: format!("Unsubscribed from job: {}", job_id),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an error message
|
||||
pub fn error(code: &str, message: &str) -> Self {
|
||||
Self {
|
||||
event: "error".to_string(),
|
||||
job_id: String::new(),
|
||||
status: code.to_string(),
|
||||
progress: 0,
|
||||
message: message.to_string(),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a job status message (convenience constructor)
|
||||
pub fn job_status(job_id: &str, status: &str, progress: u8, message: &str) -> Self {
|
||||
Self {
|
||||
event: "job_status".to_string(),
|
||||
job_id: job_id.to_string(),
|
||||
status: status.to_string(),
|
||||
progress,
|
||||
message: message.to_string(),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Internal message for broadcasting a job status event to the actor
|
||||
#[derive(Message)]
|
||||
#[rtype(result = "()")]
|
||||
pub struct BroadcastEvent(pub JobStatusEvent);
|
||||
|
||||
/// WebSocket actor for streaming job status updates
|
||||
pub struct WsJobActor {
|
||||
/// Unique ID for this WebSocket connection
|
||||
ws_id: Uuid,
|
||||
/// Broadcast receiver for job status events from JobManager
|
||||
event_rx: Option<broadcast::Receiver<JobStatusEvent>>,
|
||||
/// Set of specific job IDs this client is subscribed to
|
||||
subscribed_jobs: HashSet<String>,
|
||||
/// Whether the client is subscribed to all job events
|
||||
subscribed_all: bool,
|
||||
/// Last time we heard from the client (ping/pong or message)
|
||||
last_heartbeat: Instant,
|
||||
/// The actor's own address for the broadcast listener
|
||||
addr: Option<Addr<WsJobActor>>,
|
||||
}
|
||||
|
||||
impl WsJobActor {
|
||||
/// Create a new WebSocket actor with a broadcast receiver
|
||||
pub fn new(event_rx: broadcast::Receiver<JobStatusEvent>) -> Self {
|
||||
Self {
|
||||
ws_id: Uuid::new_v4(),
|
||||
event_rx: Some(event_rx),
|
||||
subscribed_jobs: HashSet::new(),
|
||||
subscribed_all: true, // Default: subscribe to all events
|
||||
last_heartbeat: Instant::now(),
|
||||
addr: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the heartbeat check interval
|
||||
fn start_heartbeat(&self, ctx: &mut ws::WebsocketContext<Self>) {
|
||||
ctx.run_interval(HEARTBEAT_INTERVAL, |act, ctx| {
|
||||
if Instant::now().duration_since(act.last_heartbeat) > CLIENT_TIMEOUT {
|
||||
// Heartbeat timed out, disconnect
|
||||
warn!(
|
||||
ws_id = %act.ws_id,
|
||||
"WebSocket heartbeat timeout, disconnecting"
|
||||
);
|
||||
ctx.stop();
|
||||
return;
|
||||
}
|
||||
// Send ping
|
||||
ctx.ping(b"");
|
||||
});
|
||||
}
|
||||
|
||||
/// Start listening to the broadcast channel in a background task
|
||||
fn start_broadcast_listener(&mut self, ctx: &mut <Self as Actor>::Context) {
|
||||
let addr = ctx.address();
|
||||
self.addr = Some(addr.clone());
|
||||
|
||||
// Take ownership of the receiver
|
||||
let mut rx = self.event_rx.take().expect("event_rx already taken");
|
||||
|
||||
// Spawn a task that forwards broadcast events to this actor
|
||||
actix::spawn(async move {
|
||||
loop {
|
||||
match rx.recv().await {
|
||||
Ok(event) => {
|
||||
// Send the event to the actor
|
||||
if addr.try_send(BroadcastEvent(event)).is_err() {
|
||||
// Actor is dead, stop listening
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(broadcast::error::RecvError::Lagged(n)) => {
|
||||
// We fell behind, but can continue
|
||||
debug!("WebSocket broadcast receiver lagged by {} events", n);
|
||||
}
|
||||
Err(broadcast::error::RecvError::Closed) => {
|
||||
// Channel closed, stop listening
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl Actor for WsJobActor {
|
||||
type Context = ws::WebsocketContext<Self>;
|
||||
|
||||
fn started(&mut self, ctx: &mut Self::Context) {
|
||||
info!(ws_id = %self.ws_id, "WebSocket actor started");
|
||||
|
||||
// Start heartbeat monitoring
|
||||
self.start_heartbeat(ctx);
|
||||
|
||||
// Start listening to broadcast events
|
||||
self.start_broadcast_listener(ctx);
|
||||
|
||||
// Send connection established message
|
||||
let msg = WsServerMessage::connected(&self.ws_id);
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
ctx.text(json);
|
||||
}
|
||||
}
|
||||
|
||||
fn stopping(&mut self, _ctx: &mut Self::Context) -> Running {
|
||||
info!(ws_id = %self.ws_id, "WebSocket actor stopping");
|
||||
Running::Stop
|
||||
}
|
||||
|
||||
fn stopped(&mut self, _ctx: &mut Self::Context) {
|
||||
info!(ws_id = %self.ws_id, "WebSocket actor stopped");
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle broadcast events from the JobManager channel
|
||||
impl Handler<BroadcastEvent> for WsJobActor {
|
||||
type Result = ();
|
||||
|
||||
fn handle(&mut self, msg: BroadcastEvent, ctx: &mut Self::Context) {
|
||||
let event = msg.0;
|
||||
|
||||
// Check if this client should receive this event
|
||||
let should_forward =
|
||||
self.subscribed_all || self.subscribed_jobs.contains(&event.job_id.to_string());
|
||||
|
||||
if should_forward {
|
||||
let server_msg = WsServerMessage::from_job_status_event(&event);
|
||||
match serde_json::to_string(&server_msg) {
|
||||
Ok(json) => ctx.text(json),
|
||||
Err(e) => {
|
||||
error!(ws_id = %self.ws_id, error = %e, "Failed to serialize job status event");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle WebSocket protocol messages (ping/pong, text, close, etc.)
|
||||
impl StreamHandler<Result<ws::Message, ws::ProtocolError>> for WsJobActor {
|
||||
fn handle(&mut self, msg: Result<ws::Message, ws::ProtocolError>, ctx: &mut Self::Context) {
|
||||
let msg = match msg {
|
||||
Ok(msg) => msg,
|
||||
Err(e) => {
|
||||
error!(ws_id = %self.ws_id, error = %e, "WebSocket protocol error");
|
||||
ctx.stop();
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
match msg {
|
||||
ws::Message::Ping(msg) => {
|
||||
self.last_heartbeat = Instant::now();
|
||||
ctx.pong(&msg);
|
||||
}
|
||||
ws::Message::Pong(_) => {
|
||||
self.last_heartbeat = Instant::now();
|
||||
}
|
||||
ws::Message::Text(text) => {
|
||||
let text = text.to_string();
|
||||
debug!(ws_id = %self.ws_id, text = %text, "Received WebSocket text message");
|
||||
|
||||
// Parse as client message
|
||||
match serde_json::from_str::<WsClientMessage>(&text) {
|
||||
Ok(client_msg) => match client_msg {
|
||||
WsClientMessage::Subscribe { job_id } => match job_id {
|
||||
Some(id) => {
|
||||
self.subscribed_jobs.insert(id.clone());
|
||||
let msg = WsServerMessage::subscribed(&Some(id));
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
ctx.text(json);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
self.subscribed_all = true;
|
||||
let msg = WsServerMessage::subscribed(&None);
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
ctx.text(json);
|
||||
}
|
||||
}
|
||||
},
|
||||
WsClientMessage::Unsubscribe { job_id } => {
|
||||
self.subscribed_jobs.remove(&job_id);
|
||||
let msg = WsServerMessage::unsubscribed(&job_id);
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
ctx.text(json);
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!(
|
||||
ws_id = %self.ws_id,
|
||||
error = %e,
|
||||
text = %text,
|
||||
"Invalid WebSocket client message"
|
||||
);
|
||||
let msg = WsServerMessage::error(
|
||||
"invalid_message",
|
||||
&format!("Invalid message: {}", e),
|
||||
);
|
||||
if let Ok(json) = serde_json::to_string(&msg) {
|
||||
ctx.text(json);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ws::Message::Binary(_) => {
|
||||
// We don't handle binary messages
|
||||
warn!(ws_id = %self.ws_id, "Received binary message, ignoring");
|
||||
}
|
||||
ws::Message::Close(reason) => {
|
||||
info!(ws_id = %self.ws_id, reason = ?reason, "WebSocket close received");
|
||||
ctx.close(reason);
|
||||
ctx.stop();
|
||||
}
|
||||
ws::Message::Continuation(_) => {
|
||||
// Continuation frames not expected for our use case
|
||||
ctx.stop();
|
||||
}
|
||||
ws::Message::Nop => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ws_server_message_from_event() {
|
||||
let event = JobStatusEvent {
|
||||
event: "job_status".to_string(),
|
||||
job_id: Uuid::new_v4(),
|
||||
status: "running".to_string(),
|
||||
progress: 50,
|
||||
message: "Processing...".to_string(),
|
||||
timestamp: "2026-01-01T00:00:00Z".to_string(),
|
||||
};
|
||||
let msg = WsServerMessage::from_job_status_event(&event);
|
||||
assert_eq!(msg.event, "job_status");
|
||||
assert_eq!(msg.status, "running");
|
||||
assert_eq!(msg.progress, 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ws_server_message_serialization() {
|
||||
let msg = WsServerMessage::job_status("test-uuid", "running", 50, "Processing...");
|
||||
let json = serde_json::to_string(&msg).unwrap();
|
||||
assert!(json.contains("job_status"));
|
||||
assert!(json.contains("running"));
|
||||
assert!(json.contains("50"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ws_client_message_subscribe() {
|
||||
let json = r#"{"action": "subscribe", "job_id": "test-uuid"}"#;
|
||||
let msg: WsClientMessage = serde_json::from_str(json).unwrap();
|
||||
match msg {
|
||||
WsClientMessage::Subscribe { job_id } => {
|
||||
assert_eq!(job_id, Some("test-uuid".to_string()));
|
||||
}
|
||||
_ => panic!("Expected Subscribe message"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ws_client_message_subscribe_all() {
|
||||
let json = r#"{"action": "subscribe"}"#;
|
||||
let msg: WsClientMessage = serde_json::from_str(json).unwrap();
|
||||
match msg {
|
||||
WsClientMessage::Subscribe { job_id } => {
|
||||
assert!(job_id.is_none());
|
||||
}
|
||||
_ => panic!("Expected Subscribe message"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ws_client_message_unsubscribe() {
|
||||
let json = r#"{"action": "unsubscribe", "job_id": "test-uuid"}"#;
|
||||
let msg: WsClientMessage = serde_json::from_str(json).unwrap();
|
||||
match msg {
|
||||
WsClientMessage::Unsubscribe { job_id } => {
|
||||
assert_eq!(job_id, "test-uuid");
|
||||
}
|
||||
_ => panic!("Expected Unsubscribe message"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,6 +64,19 @@ pub struct SystemInfo {
|
||||
pub pending_reboot: bool,
|
||||
}
|
||||
|
||||
/// Service status information
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ServiceStatus {
|
||||
pub name: String,
|
||||
pub display_name: String,
|
||||
pub active_state: String,
|
||||
pub sub_state: String,
|
||||
pub load_state: String,
|
||||
pub enabled_state: String,
|
||||
pub main_pid: Option<u32>,
|
||||
pub healthy: bool,
|
||||
}
|
||||
|
||||
/// Package manager backend trait
|
||||
pub trait PackageManagerBackend: Send + Sync {
|
||||
fn list_packages(&self, filter: Option<&str>) -> Result<Vec<Package>>;
|
||||
@ -75,6 +88,7 @@ pub trait PackageManagerBackend: Send + Sync {
|
||||
fn apply_patches(&self, packages: Option<&[String]>) -> Result<()>;
|
||||
fn get_system_info(&self) -> Result<SystemInfo>;
|
||||
fn reboot_system(&self, delay_seconds: u64) -> Result<()>;
|
||||
fn get_service_status(&self, name: &str) -> Result<Option<ServiceStatus>>;
|
||||
}
|
||||
|
||||
/// Package specification for installation
|
||||
@ -480,6 +494,151 @@ impl PackageManagerBackend for AptBackend {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_service_status(&self, name: &str) -> Result<Option<ServiceStatus>> {
|
||||
// Validate service name to prevent shell injection
|
||||
if name.is_empty() || name.contains('/') || name.contains("..") {
|
||||
return Err(anyhow::anyhow!("Invalid service name: {}", name));
|
||||
}
|
||||
|
||||
// Determine init system and query accordingly
|
||||
let is_systemd = std::path::Path::new("/run/systemd/system").exists();
|
||||
let is_openrc = std::path::Path::new("/sbin/openrc").exists();
|
||||
|
||||
if is_systemd {
|
||||
get_systemd_service_status(name)
|
||||
} else if is_openrc {
|
||||
get_openrc_service_status(name)
|
||||
} else {
|
||||
Err(anyhow::anyhow!(
|
||||
"No supported init system detected (systemd or OpenRC required)"
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Query systemd service status via systemctl
|
||||
fn get_systemd_service_status(name: &str) -> Result<Option<ServiceStatus>> {
|
||||
let output = Command::new("systemctl")
|
||||
.args([
|
||||
"show",
|
||||
name,
|
||||
"--property=Id,Description,ActiveState,SubState,LoadState,UnitFileState,MainPID",
|
||||
"--no-pager",
|
||||
])
|
||||
.output()
|
||||
.context("Failed to execute systemctl command")?;
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
|
||||
// If systemctl returns non-zero or empty output, service doesn't exist
|
||||
if !output.status.success() || stdout.trim().is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut id = String::new();
|
||||
let mut description = String::new();
|
||||
let mut active_state = String::new();
|
||||
let mut sub_state = String::new();
|
||||
let mut load_state = String::new();
|
||||
let mut unit_file_state = String::new();
|
||||
let mut main_pid: Option<u32> = None;
|
||||
|
||||
for line in stdout.lines() {
|
||||
if let Some((key, value)) = line.split_once('=') {
|
||||
match key {
|
||||
"Id" => id = value.to_string(),
|
||||
"Description" => description = value.to_string(),
|
||||
"ActiveState" => active_state = value.to_string(),
|
||||
"SubState" => sub_state = value.to_string(),
|
||||
"LoadState" => load_state = value.to_string(),
|
||||
"UnitFileState" => unit_file_state = value.to_string(),
|
||||
"MainPID" => {
|
||||
main_pid = value.parse::<u32>().ok().filter(|&p| p > 0);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If LoadState is not-found or bad-setting, service doesn't exist
|
||||
if load_state == "not-found" || load_state == "bad-setting" || id.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let healthy = active_state == "active" && sub_state == "running";
|
||||
|
||||
Ok(Some(ServiceStatus {
|
||||
name: id,
|
||||
display_name: description,
|
||||
active_state,
|
||||
sub_state,
|
||||
load_state,
|
||||
enabled_state: unit_file_state,
|
||||
main_pid,
|
||||
healthy,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Query OpenRC service status via rc-service
|
||||
fn get_openrc_service_status(name: &str) -> Result<Option<ServiceStatus>> {
|
||||
let output = Command::new("rc-service")
|
||||
.args([name, "status"])
|
||||
.output()
|
||||
.context("Failed to execute rc-service command")?;
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
|
||||
// rc-service returns error if service doesn't exist
|
||||
if !output.status.success() {
|
||||
if stderr.contains("does not exist") || stdout.contains("does not exist") {
|
||||
return Ok(None);
|
||||
}
|
||||
return Err(anyhow::anyhow!("rc-service failed: {}", stderr));
|
||||
}
|
||||
|
||||
// Parse rc-service status output
|
||||
let status_line = stdout.lines().next().unwrap_or("").to_lowercase();
|
||||
|
||||
let (active_state, sub_state, healthy) =
|
||||
if status_line.contains("started") || status_line.contains("running") {
|
||||
("active".to_string(), "running".to_string(), true)
|
||||
} else if status_line.contains("stopped") || status_line.contains("not running") {
|
||||
("inactive".to_string(), "dead".to_string(), false)
|
||||
} else if status_line.contains("crashed") || status_line.contains("failed") {
|
||||
("failed".to_string(), "failed".to_string(), false)
|
||||
} else {
|
||||
("unknown".to_string(), "unknown".to_string(), false)
|
||||
};
|
||||
|
||||
// Check if service is enabled using rc-update
|
||||
let enabled_output = Command::new("rc-update")
|
||||
.args(["show", "default"])
|
||||
.output()
|
||||
.ok();
|
||||
|
||||
let enabled_state = enabled_output
|
||||
.and_then(|o| String::from_utf8(o.stdout).ok())
|
||||
.map(|s| {
|
||||
if s.lines().any(|l| l.trim().starts_with(name)) {
|
||||
"enabled".to_string()
|
||||
} else {
|
||||
"disabled".to_string()
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
|
||||
Ok(Some(ServiceStatus {
|
||||
name: name.to_string(),
|
||||
display_name: name.to_string(),
|
||||
active_state,
|
||||
sub_state,
|
||||
load_state: "loaded".to_string(),
|
||||
enabled_state,
|
||||
main_pid: None,
|
||||
healthy,
|
||||
}))
|
||||
}
|
||||
|
||||
impl Default for AptBackend {
|
||||
|
||||
@ -71,3 +71,21 @@
|
||||
**Correction:** Removed sudo from apt command execution in the source code. Service runs as root and can execute apt directly.
|
||||
**Rule:** If a service runs as root, it does not need sudo to execute commands. Remove sudo from command execution.
|
||||
**Status:** Active
|
||||
|
||||
## 2026-05-03 - CapabilityBoundingSet blocks apt sandbox operations
|
||||
**Mistake:** Used CapabilityBoundingSet=CAP_SYS_BOOT which dropped ALL capabilities except SYS_BOOT, blocking apt's _apt sandbox (setuid/setgid/setgroups/chown).
|
||||
**Correction:** Removed CapabilityBoundingSet and AmbientCapabilities entirely. Package management requires full root capabilities. Network security is provided by mTLS + IP whitelist.
|
||||
**Rule:** For package management services running as root, do NOT use CapabilityBoundingSet or AmbientCapabilities. These block apt/dpkg sandbox operations. mTLS + IP whitelist provides network security.
|
||||
**Status:** Active
|
||||
|
||||
## 2026-05-03 - E2E test false positives on status=failed
|
||||
**Mistake:** E2E test accepted status=failed as a valid outcome for install/update/remove operations, masking critical failures.
|
||||
**Correction:** Fixed E2E test to properly FAIL (assert) when status=failed is returned for package operations.
|
||||
**Rule:** E2E tests must assert status=completed for core operations. A failed package install is a 100% total failure of the API's core function.
|
||||
**Status:** Active
|
||||
|
||||
## 2026-05-03 - Systemd sandbox whack-a-mole pattern
|
||||
**Mistake:** Fixed systemd sandbox restrictions one at a time (ProtectSystem → NoNewPrivileges → RestrictSUIDSGID → CapabilityBoundingSet) instead of analyzing all restrictions at once.
|
||||
**Correction:** Removed ALL restrictive sandbox settings at once after understanding that package management requires full system access.
|
||||
**Rule:** When a service fundamentally conflicts with systemd sandboxing, analyze ALL restrictions at once rather than fixing them one at a time. Package management services need: no ProtectSystem=strict, no NoNewPrivileges, no RestrictSUIDSGID, no CapabilityBoundingSet, no AmbientCapabilities restrictions.
|
||||
**Status:** Active
|
||||
|
||||
BIN
tests/e2e/__pycache__/test_e2e.cpython-313.pyc
Normal file
BIN
tests/e2e/__pycache__/test_e2e.cpython-313.pyc
Normal file
Binary file not shown.
@ -298,8 +298,8 @@ def test_get_package_not_found(client: PatchAPIClient) -> str:
|
||||
def test_install_package(client: PatchAPIClient) -> str:
|
||||
"""POST /api/v1/packages - Install a safe test package (hello).
|
||||
|
||||
Note: Install may fail due to service permissions (NoNewPrivileges=true).
|
||||
Both completed and failed are acceptable outcomes.
|
||||
Verifies that the package installation completes successfully.
|
||||
A failed status is a critical failure - the core function must work.
|
||||
"""
|
||||
payload = {
|
||||
"packages": [{"name": TEST_PACKAGE, "version": None}],
|
||||
@ -318,10 +318,7 @@ def test_install_package(client: PatchAPIClient) -> str:
|
||||
# Poll job to completion
|
||||
job_id = data["data"]["job_id"]
|
||||
job = poll_job(client, job_id)
|
||||
# Install may fail due to service permissions - both outcomes acceptable
|
||||
if job["status"] == "failed":
|
||||
return f"Install job completed with status=failed (may be permissions issue): job_id={job_id}, result={job.get('result', {})}"
|
||||
assert job["status"] == "completed", f"Install job unexpected status: {job['status']}"
|
||||
assert job["status"] == "completed", f"Install job failed: status={job['status']}, result={job.get('result', {})}"
|
||||
return f"Installed {TEST_PACKAGE}: job_id={job_id}, status={job['status']}"
|
||||
|
||||
|
||||
@ -336,15 +333,15 @@ def test_update_package(client: PatchAPIClient) -> str:
|
||||
|
||||
job_id = data["data"]["job_id"]
|
||||
job = poll_job(client, job_id)
|
||||
# Update may complete or fail (package already latest or not installed)
|
||||
assert job["status"] in ["completed", "failed"], f"Unexpected job status: {job['status']}"
|
||||
assert job["status"] == "completed", f"Update job failed: status={job['status']}, result={job.get('result', {})}"
|
||||
return f"Updated {TEST_PACKAGE}: job_id={job_id}, status={job['status']}"
|
||||
|
||||
|
||||
def test_remove_package(client: PatchAPIClient) -> str:
|
||||
"""DELETE /api/v1/packages/{name} - Remove the test package.
|
||||
|
||||
Note: Remove may fail if package wasn't installed. Both outcomes acceptable.
|
||||
Verifies that the package removal completes successfully.
|
||||
A failed status is a critical failure - the core function must work.
|
||||
"""
|
||||
resp = client.delete(f"/api/v1/packages/{TEST_PACKAGE}")
|
||||
assert resp.status_code == 202, f"Expected 202, got {resp.status_code}: {resp.text}"
|
||||
@ -355,8 +352,7 @@ def test_remove_package(client: PatchAPIClient) -> str:
|
||||
|
||||
job_id = data["data"]["job_id"]
|
||||
job = poll_job(client, job_id)
|
||||
# Remove may fail if package wasn't installed
|
||||
assert job["status"] in ["completed", "failed"], f"Remove job unexpected status: {job['status']}"
|
||||
assert job["status"] == "completed", f"Remove job failed: status={job['status']}, result={job.get('result', {})}"
|
||||
return f"Removed {TEST_PACKAGE}: job_id={job_id}, status={job['status']}"
|
||||
|
||||
|
||||
@ -568,8 +564,8 @@ def test_wrong_cert_connection(client: PatchAPIClient) -> str:
|
||||
def test_job_lifecycle(client: PatchAPIClient) -> str:
|
||||
"""Full job lifecycle: install -> get job -> list jobs -> remove.
|
||||
|
||||
Accepts both completed and failed outcomes for install/remove
|
||||
since service may have permission restrictions.
|
||||
Verifies that install and remove both complete successfully.
|
||||
A failed status is a critical failure - the core function must work.
|
||||
"""
|
||||
# Step 1: Install test package
|
||||
payload = {
|
||||
@ -589,7 +585,7 @@ def test_job_lifecycle(client: PatchAPIClient) -> str:
|
||||
|
||||
# Step 3: Poll to completion
|
||||
job = poll_job(client, job_id)
|
||||
assert job["status"] in ["completed", "failed"], f"Install job unexpected status: {job['status']}"
|
||||
assert job["status"] == "completed", f"Install job failed: status={job['status']}, result={job.get('result', {})}"
|
||||
|
||||
# Step 4: Verify in job list
|
||||
resp = client.get("/api/v1/jobs?limit=50", timeout=120)
|
||||
@ -603,11 +599,42 @@ def test_job_lifecycle(client: PatchAPIClient) -> str:
|
||||
assert resp.status_code == 202, f"Remove failed: HTTP {resp.status_code}"
|
||||
remove_job_id = resp.json()["data"]["job_id"]
|
||||
remove_job = poll_job(client, remove_job_id)
|
||||
assert remove_job["status"] in ["completed", "failed"], f"Remove job unexpected status: {remove_job['status']}"
|
||||
assert remove_job["status"] == "completed", f"Remove job failed: status={remove_job['status']}, result={remove_job.get('result', {})}"
|
||||
|
||||
return f"Full lifecycle OK: install job={job_id}, remove job={remove_job_id}"
|
||||
|
||||
|
||||
def test_service_status(client: PatchAPIClient) -> str:
|
||||
"""GET /api/v1/system/services/{name} - Test service status endpoint."""
|
||||
# Test with a known service (ssh)
|
||||
resp = client.get("/api/v1/system/services/ssh")
|
||||
assert resp.status_code == 200, f"Expected 200, got {resp.status_code}: {resp.text}"
|
||||
data = resp.json()
|
||||
err = validate_envelope(data, "service_status")
|
||||
assert err is None, f"Envelope validation failed: {err}"
|
||||
assert data["success"] is True
|
||||
assert "name" in data["data"], "Missing name field"
|
||||
assert "active_state" in data["data"], "Missing active_state field"
|
||||
assert "healthy" in data["data"], "Missing healthy field"
|
||||
assert isinstance(data["data"]["healthy"], bool), "healthy must be boolean"
|
||||
|
||||
# Test with non-existent service
|
||||
resp = client.get("/api/v1/system/services/nonexistent-service-12345")
|
||||
assert resp.status_code == 404, f"Expected 404, got {resp.status_code}"
|
||||
data = resp.json()
|
||||
assert data["success"] is False
|
||||
assert data["error"]["code"] == "SERVICE_NOT_FOUND"
|
||||
|
||||
# Test with invalid service name
|
||||
resp = client.get("/api/v1/system/services/../../etc/passwd")
|
||||
assert resp.status_code == 400, f"Expected 400, got {resp.status_code}"
|
||||
data = resp.json()
|
||||
assert data["success"] is False
|
||||
assert data["error"]["code"] == "INVALID_SERVICE_NAME"
|
||||
|
||||
return f"Service status OK: ssh={data['data']['name']}, state={data['data']['active_state']}, healthy={data['data']['healthy']}"
|
||||
|
||||
|
||||
def test_reboot_endpoint(client: PatchAPIClient) -> str:
|
||||
"""POST /api/v1/system/reboot - Test reboot endpoint.
|
||||
|
||||
@ -657,6 +684,7 @@ def run_all_tests(target_key: str, skip_reboot: bool = False, verbose: bool = Fa
|
||||
print("\n--- Health & System ---")
|
||||
run_test(results, "Health Check", test_health_endpoint, client)
|
||||
run_test(results, "System Info", test_system_info, client)
|
||||
run_test(results, "Service Status (ssh)", test_service_status, client)
|
||||
|
||||
# ---- Category 2: Package Operations ----
|
||||
print("\n--- Package Operations ---")
|
||||
|
||||
Reference in New Issue
Block a user