feat: add auto-enrollment, cert validation, and crash loop fixes
- Auto-enrollment on startup when certs are missing/invalid and enrollment.manager_url configured - Certificate validation (existence, parse, expiry, key match, CA trust) - --enroll exits after completion (no port conflict with systemd service) - --renew-certs flag for manual cert renewal - SO_REUSEADDR on TcpListener::bind (prevents Address already in use) - Polling token persistence for enrollment resume after restart - Exit code strategy (0=clean, 1=error, 2=enrollment in progress) - HTTP 409 (host already exists) handling during enrollment - Move 'Listening on' log after actual bind - Increase RestartSec to 10s and add StartLimitBurst=5 - Postinst checks for certs and enrollment URL, prints guidance - EnrollmentConfig.manager_url changed to Option<String> - cert_renewal_threshold_days and polling_token config fields - Updated SPEC.md and DEPLOYMENT_GUIDE.md with new workflow - RCA document for crash loop root cause analysis - Version bumped to 1.2.0
This commit is contained in:
236
src/main.rs
236
src/main.rs
@ -12,17 +12,23 @@
|
||||
//! - mTLS authentication required on port 12443
|
||||
//! - IP whitelist enforced (deny by default)
|
||||
//! - Detailed audit logging
|
||||
//!
|
||||
//! # Exit Codes
|
||||
//!
|
||||
//! - 0: Clean exit (no certs + no enrollment URL, or --enroll/--renew-certs success)
|
||||
//! - 1: Error (config error, enrollment network failure, cert validation error)
|
||||
//! - 2: Certs invalid, auto-enrollment in progress (triggers systemd restart with backoff)
|
||||
|
||||
use actix_web::middleware::Logger;
|
||||
use actix_web::{web, App, HttpServer};
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use std::net::TcpListener;
|
||||
use std::sync::Arc;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use linux_patch_api::api::{configure_api_routes, configure_health_route};
|
||||
use linux_patch_api::auth::{mtls, MtlsMiddleware, WhitelistManager};
|
||||
use linux_patch_api::config::loader::{validate_certs, CertStatus};
|
||||
use linux_patch_api::enroll;
|
||||
use linux_patch_api::packages::cache::PackageCacheState;
|
||||
use linux_patch_api::packages::create_backend;
|
||||
@ -42,12 +48,29 @@ struct Args {
|
||||
#[arg(short, long)]
|
||||
verbose: bool,
|
||||
|
||||
/// Enroll with manager at URL (skips mTLS startup, runs enrollment flow only)
|
||||
/// Enroll with manager at URL (skips mTLS startup, runs enrollment flow only, then exits)
|
||||
#[arg(
|
||||
long,
|
||||
help = "Enroll with manager at URL (skips mTLS startup, runs enrollment flow only)"
|
||||
help = "Enroll with manager at URL (skips mTLS startup, runs enrollment flow only, then exits)"
|
||||
)]
|
||||
enroll: Option<String>,
|
||||
|
||||
/// Validate existing certs and re-enroll if expiring within threshold or invalid
|
||||
#[arg(
|
||||
long,
|
||||
help = "Validate existing certs and re-enroll if expiring within threshold or invalid, then exits"
|
||||
)]
|
||||
renew_certs: bool,
|
||||
}
|
||||
|
||||
/// Exit codes for the daemon
|
||||
enum ExitCode {
|
||||
/// Clean exit: no certs + no enrollment URL, or --enroll/--renew-certs success
|
||||
Clean = 0,
|
||||
/// Error: config error, enrollment network failure, cert validation error
|
||||
Error = 1,
|
||||
/// Certs invalid, auto-enrollment in progress (triggers systemd restart with backoff)
|
||||
EnrollmentInProgress = 2,
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
@ -69,8 +92,9 @@ async fn main() -> Result<()> {
|
||||
"Linux Patch API starting"
|
||||
);
|
||||
|
||||
// Load configuration
|
||||
let config = match AppConfig::load(&args.config, args.enroll.is_some()) {
|
||||
// Load configuration (skip TLS validation during enrollment mode)
|
||||
let skip_tls_validation = args.enroll.is_some();
|
||||
let mut config = match AppConfig::load(&args.config, skip_tls_validation) {
|
||||
Ok(cfg) => {
|
||||
info!(
|
||||
port = cfg.server.port,
|
||||
@ -81,23 +105,142 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error = %e, path = args.config, "Failed to load configuration");
|
||||
return Err(anyhow::anyhow!("Configuration error: {}", e));
|
||||
std::process::exit(ExitCode::Error as i32);
|
||||
}
|
||||
};
|
||||
|
||||
// Handle enrollment mode - runs before server startup
|
||||
// Handle --renew-certs flag: validate certs and re-enroll if needed
|
||||
if args.renew_certs {
|
||||
info!("Certificate renewal mode activated - validating existing certificates");
|
||||
match validate_certs(&config) {
|
||||
Ok(CertStatus::Valid) => {
|
||||
info!("Certificates are valid and not expiring soon. No renewal needed.");
|
||||
std::process::exit(ExitCode::Clean as i32);
|
||||
}
|
||||
Ok(CertStatus::ExpiringSoon { not_after }) => {
|
||||
info!(
|
||||
not_after = %not_after,
|
||||
"Certificates expiring soon - starting re-enrollment"
|
||||
);
|
||||
}
|
||||
Ok(status) => {
|
||||
info!(
|
||||
status = %status,
|
||||
"Certificates are {} - starting re-enrollment",
|
||||
status
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error = %e, "Certificate validation failed");
|
||||
std::process::exit(ExitCode::Error as i32);
|
||||
}
|
||||
}
|
||||
|
||||
// Need enrollment URL to re-enroll
|
||||
let manager_url = match config.enrollment_manager_url() {
|
||||
Some(url) => url.to_string(),
|
||||
None => {
|
||||
error!(
|
||||
"Cannot re-enroll: enrollment.manager_url not configured. \
|
||||
Add the manager URL to config.yaml or use --enroll <url>"
|
||||
);
|
||||
std::process::exit(ExitCode::Error as i32);
|
||||
}
|
||||
};
|
||||
|
||||
match enroll::run_enrollment(&manager_url, &mut config, &args.config).await {
|
||||
Ok(()) => {
|
||||
info!("Certificate renewal complete. Start service: systemctl start linux-patch-api");
|
||||
std::process::exit(ExitCode::Clean as i32);
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error = %e, "Certificate renewal failed");
|
||||
std::process::exit(ExitCode::Error as i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle --enroll flag: run enrollment flow then EXIT
|
||||
if let Some(ref manager_url) = args.enroll {
|
||||
info!(
|
||||
manager_url = manager_url,
|
||||
"Enrollment mode activated - running enrollment flow before server startup"
|
||||
"Enrollment mode activated - running enrollment flow"
|
||||
);
|
||||
match enroll::run_enrollment(manager_url, &config).await {
|
||||
match enroll::run_enrollment(manager_url, &mut config, &args.config).await {
|
||||
Ok(()) => {
|
||||
info!("Enrollment complete - proceeding to server startup");
|
||||
info!("Enrollment complete. Start service: systemctl start linux-patch-api");
|
||||
std::process::exit(ExitCode::Clean as i32);
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error = %e, "Enrollment failed - shutting down");
|
||||
return Err(anyhow::anyhow!("Enrollment failed: {}", e));
|
||||
error!(error = %e, "Enrollment failed");
|
||||
std::process::exit(ExitCode::Error as i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-enrollment on startup: validate certs before starting server
|
||||
if config.tls_config().is_some() {
|
||||
match validate_certs(&config) {
|
||||
Ok(CertStatus::Valid) => {
|
||||
info!("TLS certificates validated successfully");
|
||||
}
|
||||
Ok(CertStatus::ExpiringSoon { not_after }) => {
|
||||
warn!(
|
||||
not_after = %not_after,
|
||||
"Certificates expiring soon - starting normally, consider re-enrollment"
|
||||
);
|
||||
// TODO: Schedule background re-enrollment in future phase
|
||||
}
|
||||
Ok(status @ CertStatus::Missing { .. })
|
||||
| Ok(status @ CertStatus::Corrupt { .. })
|
||||
| Ok(status @ CertStatus::Expired { .. })
|
||||
| Ok(status @ CertStatus::KeyMismatch)
|
||||
| Ok(status @ CertStatus::Untrusted) => {
|
||||
// Certs are invalid - check if we can auto-enroll
|
||||
// Clone the manager URL before mutable borrow of config
|
||||
let manager_url_opt = config.enrollment_manager_url().map(|s| s.to_string());
|
||||
match manager_url_opt {
|
||||
Some(manager_url) => {
|
||||
info!(
|
||||
status = %status,
|
||||
manager_url = manager_url,
|
||||
"Certs {}. Auto-enrolling with {}",
|
||||
status,
|
||||
manager_url
|
||||
);
|
||||
match enroll::run_enrollment(&manager_url, &mut config, &args.config).await {
|
||||
Ok(()) => {
|
||||
info!("Auto-enrollment complete - continuing to server startup");
|
||||
// Re-load config to pick up any changes from enrollment
|
||||
config = AppConfig::load(&args.config, false)?;
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
error = %e,
|
||||
"Auto-enrollment failed - will retry on next restart"
|
||||
);
|
||||
std::process::exit(ExitCode::EnrollmentInProgress as i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// No enrollment URL configured - exit cleanly to avoid crash loop
|
||||
error!(
|
||||
status = %status,
|
||||
"Certs {}. No enrollment URL configured. \
|
||||
To fix this, either:\n\
|
||||
1. Add enrollment.manager_url to config.yaml and restart\n\
|
||||
2. Run: linux-patch-api --enroll <manager_url>\n\
|
||||
3. Place certificates manually in the configured paths",
|
||||
status
|
||||
);
|
||||
std::process::exit(ExitCode::Clean as i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error = %e, "Certificate validation error");
|
||||
std::process::exit(ExitCode::Error as i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -153,9 +296,7 @@ async fn main() -> Result<()> {
|
||||
|
||||
// Configure bind address
|
||||
let bind_address = format!("{}:{}", config.server.bind, config.server.port);
|
||||
info!(bind = %bind_address, "Starting HTTP server");
|
||||
|
||||
// Create server
|
||||
// Create server builder
|
||||
let server_builder = HttpServer::new(move || {
|
||||
let mut app = App::new()
|
||||
@ -175,7 +316,6 @@ async fn main() -> Result<()> {
|
||||
});
|
||||
|
||||
// Configure health route (outside API scope)
|
||||
// cache_state and backend are available via app_data registered above
|
||||
app = app.configure(configure_health_route);
|
||||
|
||||
app
|
||||
@ -194,7 +334,6 @@ async fn main() -> Result<()> {
|
||||
);
|
||||
|
||||
info!("Linux Patch API initialized successfully");
|
||||
info!("Listening on {}", bind_address);
|
||||
|
||||
// Apply TLS/mTLS configuration if enabled
|
||||
if let Some(tls_config) = config.tls_config() {
|
||||
@ -222,11 +361,37 @@ async fn main() -> Result<()> {
|
||||
|
||||
info!("mTLS middleware and rustls config initialized successfully");
|
||||
|
||||
// Create TCP listener (std::net for listen_rustls_0_23)
|
||||
let tcp_listener = TcpListener::bind(&bind_address)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to bind to {}: {}", bind_address, e))?;
|
||||
// Create TCP listener with SO_REUSEADDR using socket2
|
||||
// This prevents "Address already in use" errors when restarting after a crash
|
||||
let socket = socket2::Socket::new(
|
||||
socket2::Domain::IPV4,
|
||||
socket2::Type::STREAM,
|
||||
Some(socket2::Protocol::TCP),
|
||||
)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to create socket: {}", e))?;
|
||||
|
||||
info!("TCP listener bound to {}", bind_address);
|
||||
socket
|
||||
.set_reuse_address(true)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to set SO_REUSEADDR: {}", e))?;
|
||||
|
||||
let bind_addr: std::net::SocketAddr = bind_address
|
||||
.parse()
|
||||
.map_err(|e| anyhow::anyhow!("Invalid bind address '{}': {}", bind_address, e))?;
|
||||
|
||||
socket
|
||||
.bind(&socket2::SockAddr::from(bind_addr))
|
||||
.map_err(|e| {
|
||||
anyhow::anyhow!("Failed to bind socket to {}: {}", bind_address, e)
|
||||
})?;
|
||||
|
||||
socket
|
||||
.listen(128)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to listen on socket: {}", e))?;
|
||||
|
||||
let tcp_listener: std::net::TcpListener = socket.into();
|
||||
|
||||
// Log listening AFTER successful bind
|
||||
info!("Listening on {} (mTLS enabled)", bind_address);
|
||||
|
||||
// Clone the ServerConfig from Arc for listen_rustls_0_23
|
||||
let server_config = (*rustls_config).clone();
|
||||
@ -245,8 +410,37 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Create TCP listener with SO_REUSEADDR for non-TLS mode
|
||||
let socket = socket2::Socket::new(
|
||||
socket2::Domain::IPV4,
|
||||
socket2::Type::STREAM,
|
||||
Some(socket2::Protocol::TCP),
|
||||
)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to create socket: {}", e))?;
|
||||
|
||||
socket
|
||||
.set_reuse_address(true)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to set SO_REUSEADDR: {}", e))?;
|
||||
|
||||
let bind_addr: std::net::SocketAddr = bind_address
|
||||
.parse()
|
||||
.map_err(|e| anyhow::anyhow!("Invalid bind address '{}': {}", bind_address, e))?;
|
||||
|
||||
socket
|
||||
.bind(&socket2::SockAddr::from(bind_addr))
|
||||
.map_err(|e| anyhow::anyhow!("Failed to bind socket to {}: {}", bind_address, e))?;
|
||||
|
||||
socket
|
||||
.listen(128)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to listen on socket: {}", e))?;
|
||||
|
||||
let tcp_listener: std::net::TcpListener = socket.into();
|
||||
|
||||
// Log listening AFTER successful bind
|
||||
info!("Listening on {} (no TLS)", bind_address);
|
||||
|
||||
warn!("TLS is disabled - running without mTLS authentication (INSECURE)");
|
||||
server_builder.bind(&bind_address)?.run().await?;
|
||||
server_builder.listen(tcp_listener)?.run().await?;
|
||||
}
|
||||
|
||||
info!("Linux Patch API shutting down");
|
||||
|
||||
Reference in New Issue
Block a user