Private
Public Access
1
0

feat: add auto-enrollment, cert validation, and crash loop fixes

- Auto-enrollment on startup when certs are missing/invalid and enrollment.manager_url configured
- Certificate validation (existence, parse, expiry, key match, CA trust)
- --enroll exits after completion (no port conflict with systemd service)
- --renew-certs flag for manual cert renewal
- SO_REUSEADDR on TcpListener::bind (prevents Address already in use)
- Polling token persistence for enrollment resume after restart
- Exit code strategy (0=clean, 1=error, 2=enrollment in progress)
- HTTP 409 (host already exists) handling during enrollment
- Move 'Listening on' log after actual bind
- Increase RestartSec to 10s and add StartLimitBurst=5
- Postinst checks for certs and enrollment URL, prints guidance
- EnrollmentConfig.manager_url changed to Option<String>
- cert_renewal_threshold_days and polling_token config fields
- Updated SPEC.md and DEPLOYMENT_GUIDE.md with new workflow
- RCA document for crash loop root cause analysis
- Version bumped to 1.2.0
This commit is contained in:
2026-05-29 10:44:42 -05:00
parent 48ec57581e
commit 1322598581
43 changed files with 1364 additions and 974 deletions

View File

@ -12,17 +12,23 @@
//! - mTLS authentication required on port 12443
//! - IP whitelist enforced (deny by default)
//! - Detailed audit logging
//!
//! # Exit Codes
//!
//! - 0: Clean exit (no certs + no enrollment URL, or --enroll/--renew-certs success)
//! - 1: Error (config error, enrollment network failure, cert validation error)
//! - 2: Certs invalid, auto-enrollment in progress (triggers systemd restart with backoff)
use actix_web::middleware::Logger;
use actix_web::{web, App, HttpServer};
use anyhow::Result;
use clap::Parser;
use std::net::TcpListener;
use std::sync::Arc;
use tracing::{error, info, warn};
use linux_patch_api::api::{configure_api_routes, configure_health_route};
use linux_patch_api::auth::{mtls, MtlsMiddleware, WhitelistManager};
use linux_patch_api::config::loader::{validate_certs, CertStatus};
use linux_patch_api::enroll;
use linux_patch_api::packages::cache::PackageCacheState;
use linux_patch_api::packages::create_backend;
@ -42,12 +48,29 @@ struct Args {
#[arg(short, long)]
verbose: bool,
/// Enroll with manager at URL (skips mTLS startup, runs enrollment flow only)
/// Enroll with manager at URL (skips mTLS startup, runs enrollment flow only, then exits)
#[arg(
long,
help = "Enroll with manager at URL (skips mTLS startup, runs enrollment flow only)"
help = "Enroll with manager at URL (skips mTLS startup, runs enrollment flow only, then exits)"
)]
enroll: Option<String>,
/// Validate existing certs and re-enroll if expiring within threshold or invalid
#[arg(
long,
help = "Validate existing certs and re-enroll if expiring within threshold or invalid, then exits"
)]
renew_certs: bool,
}
/// Exit codes for the daemon
enum ExitCode {
/// Clean exit: no certs + no enrollment URL, or --enroll/--renew-certs success
Clean = 0,
/// Error: config error, enrollment network failure, cert validation error
Error = 1,
/// Certs invalid, auto-enrollment in progress (triggers systemd restart with backoff)
EnrollmentInProgress = 2,
}
#[actix_web::main]
@ -69,8 +92,9 @@ async fn main() -> Result<()> {
"Linux Patch API starting"
);
// Load configuration
let config = match AppConfig::load(&args.config, args.enroll.is_some()) {
// Load configuration (skip TLS validation during enrollment mode)
let skip_tls_validation = args.enroll.is_some();
let mut config = match AppConfig::load(&args.config, skip_tls_validation) {
Ok(cfg) => {
info!(
port = cfg.server.port,
@ -81,23 +105,142 @@ async fn main() -> Result<()> {
}
Err(e) => {
error!(error = %e, path = args.config, "Failed to load configuration");
return Err(anyhow::anyhow!("Configuration error: {}", e));
std::process::exit(ExitCode::Error as i32);
}
};
// Handle enrollment mode - runs before server startup
// Handle --renew-certs flag: validate certs and re-enroll if needed
if args.renew_certs {
info!("Certificate renewal mode activated - validating existing certificates");
match validate_certs(&config) {
Ok(CertStatus::Valid) => {
info!("Certificates are valid and not expiring soon. No renewal needed.");
std::process::exit(ExitCode::Clean as i32);
}
Ok(CertStatus::ExpiringSoon { not_after }) => {
info!(
not_after = %not_after,
"Certificates expiring soon - starting re-enrollment"
);
}
Ok(status) => {
info!(
status = %status,
"Certificates are {} - starting re-enrollment",
status
);
}
Err(e) => {
error!(error = %e, "Certificate validation failed");
std::process::exit(ExitCode::Error as i32);
}
}
// Need enrollment URL to re-enroll
let manager_url = match config.enrollment_manager_url() {
Some(url) => url.to_string(),
None => {
error!(
"Cannot re-enroll: enrollment.manager_url not configured. \
Add the manager URL to config.yaml or use --enroll <url>"
);
std::process::exit(ExitCode::Error as i32);
}
};
match enroll::run_enrollment(&manager_url, &mut config, &args.config).await {
Ok(()) => {
info!("Certificate renewal complete. Start service: systemctl start linux-patch-api");
std::process::exit(ExitCode::Clean as i32);
}
Err(e) => {
error!(error = %e, "Certificate renewal failed");
std::process::exit(ExitCode::Error as i32);
}
}
}
// Handle --enroll flag: run enrollment flow then EXIT
if let Some(ref manager_url) = args.enroll {
info!(
manager_url = manager_url,
"Enrollment mode activated - running enrollment flow before server startup"
"Enrollment mode activated - running enrollment flow"
);
match enroll::run_enrollment(manager_url, &config).await {
match enroll::run_enrollment(manager_url, &mut config, &args.config).await {
Ok(()) => {
info!("Enrollment complete - proceeding to server startup");
info!("Enrollment complete. Start service: systemctl start linux-patch-api");
std::process::exit(ExitCode::Clean as i32);
}
Err(e) => {
error!(error = %e, "Enrollment failed - shutting down");
return Err(anyhow::anyhow!("Enrollment failed: {}", e));
error!(error = %e, "Enrollment failed");
std::process::exit(ExitCode::Error as i32);
}
}
}
// Auto-enrollment on startup: validate certs before starting server
if config.tls_config().is_some() {
match validate_certs(&config) {
Ok(CertStatus::Valid) => {
info!("TLS certificates validated successfully");
}
Ok(CertStatus::ExpiringSoon { not_after }) => {
warn!(
not_after = %not_after,
"Certificates expiring soon - starting normally, consider re-enrollment"
);
// TODO: Schedule background re-enrollment in future phase
}
Ok(status @ CertStatus::Missing { .. })
| Ok(status @ CertStatus::Corrupt { .. })
| Ok(status @ CertStatus::Expired { .. })
| Ok(status @ CertStatus::KeyMismatch)
| Ok(status @ CertStatus::Untrusted) => {
// Certs are invalid - check if we can auto-enroll
// Clone the manager URL before mutable borrow of config
let manager_url_opt = config.enrollment_manager_url().map(|s| s.to_string());
match manager_url_opt {
Some(manager_url) => {
info!(
status = %status,
manager_url = manager_url,
"Certs {}. Auto-enrolling with {}",
status,
manager_url
);
match enroll::run_enrollment(&manager_url, &mut config, &args.config).await {
Ok(()) => {
info!("Auto-enrollment complete - continuing to server startup");
// Re-load config to pick up any changes from enrollment
config = AppConfig::load(&args.config, false)?;
}
Err(e) => {
error!(
error = %e,
"Auto-enrollment failed - will retry on next restart"
);
std::process::exit(ExitCode::EnrollmentInProgress as i32);
}
}
}
None => {
// No enrollment URL configured - exit cleanly to avoid crash loop
error!(
status = %status,
"Certs {}. No enrollment URL configured. \
To fix this, either:\n\
1. Add enrollment.manager_url to config.yaml and restart\n\
2. Run: linux-patch-api --enroll <manager_url>\n\
3. Place certificates manually in the configured paths",
status
);
std::process::exit(ExitCode::Clean as i32);
}
}
}
Err(e) => {
error!(error = %e, "Certificate validation error");
std::process::exit(ExitCode::Error as i32);
}
}
}
@ -153,9 +296,7 @@ async fn main() -> Result<()> {
// Configure bind address
let bind_address = format!("{}:{}", config.server.bind, config.server.port);
info!(bind = %bind_address, "Starting HTTP server");
// Create server
// Create server builder
let server_builder = HttpServer::new(move || {
let mut app = App::new()
@ -175,7 +316,6 @@ async fn main() -> Result<()> {
});
// Configure health route (outside API scope)
// cache_state and backend are available via app_data registered above
app = app.configure(configure_health_route);
app
@ -194,7 +334,6 @@ async fn main() -> Result<()> {
);
info!("Linux Patch API initialized successfully");
info!("Listening on {}", bind_address);
// Apply TLS/mTLS configuration if enabled
if let Some(tls_config) = config.tls_config() {
@ -222,11 +361,37 @@ async fn main() -> Result<()> {
info!("mTLS middleware and rustls config initialized successfully");
// Create TCP listener (std::net for listen_rustls_0_23)
let tcp_listener = TcpListener::bind(&bind_address)
.map_err(|e| anyhow::anyhow!("Failed to bind to {}: {}", bind_address, e))?;
// Create TCP listener with SO_REUSEADDR using socket2
// This prevents "Address already in use" errors when restarting after a crash
let socket = socket2::Socket::new(
socket2::Domain::IPV4,
socket2::Type::STREAM,
Some(socket2::Protocol::TCP),
)
.map_err(|e| anyhow::anyhow!("Failed to create socket: {}", e))?;
info!("TCP listener bound to {}", bind_address);
socket
.set_reuse_address(true)
.map_err(|e| anyhow::anyhow!("Failed to set SO_REUSEADDR: {}", e))?;
let bind_addr: std::net::SocketAddr = bind_address
.parse()
.map_err(|e| anyhow::anyhow!("Invalid bind address '{}': {}", bind_address, e))?;
socket
.bind(&socket2::SockAddr::from(bind_addr))
.map_err(|e| {
anyhow::anyhow!("Failed to bind socket to {}: {}", bind_address, e)
})?;
socket
.listen(128)
.map_err(|e| anyhow::anyhow!("Failed to listen on socket: {}", e))?;
let tcp_listener: std::net::TcpListener = socket.into();
// Log listening AFTER successful bind
info!("Listening on {} (mTLS enabled)", bind_address);
// Clone the ServerConfig from Arc for listen_rustls_0_23
let server_config = (*rustls_config).clone();
@ -245,8 +410,37 @@ async fn main() -> Result<()> {
}
}
} else {
// Create TCP listener with SO_REUSEADDR for non-TLS mode
let socket = socket2::Socket::new(
socket2::Domain::IPV4,
socket2::Type::STREAM,
Some(socket2::Protocol::TCP),
)
.map_err(|e| anyhow::anyhow!("Failed to create socket: {}", e))?;
socket
.set_reuse_address(true)
.map_err(|e| anyhow::anyhow!("Failed to set SO_REUSEADDR: {}", e))?;
let bind_addr: std::net::SocketAddr = bind_address
.parse()
.map_err(|e| anyhow::anyhow!("Invalid bind address '{}': {}", bind_address, e))?;
socket
.bind(&socket2::SockAddr::from(bind_addr))
.map_err(|e| anyhow::anyhow!("Failed to bind socket to {}: {}", bind_address, e))?;
socket
.listen(128)
.map_err(|e| anyhow::anyhow!("Failed to listen on socket: {}", e))?;
let tcp_listener: std::net::TcpListener = socket.into();
// Log listening AFTER successful bind
info!("Listening on {} (no TLS)", bind_address);
warn!("TLS is disabled - running without mTLS authentication (INSECURE)");
server_builder.bind(&bind_address)?.run().await?;
server_builder.listen(tcp_listener)?.run().await?;
}
info!("Linux Patch API shutting down");