feat: implement pod-based scanning architecture

This major refactor moves from synchronous subprocess-based scanning to
asynchronous pod-based scanning using Kubernetes Jobs.

## Architecture Changes
- Scanner jobs are now Kubernetes Jobs with TTLAfterFinished for automatic cleanup
- Jobs have owner references for garbage collection when NucleiScan is deleted
- Configurable concurrency limits, timeouts, and resource requirements

## New Features
- Dual-mode binary: --mode=controller (default) or --mode=scanner
- Annotation-based configuration for Ingress/VirtualService resources
- Operator-level configuration via environment variables
- Startup recovery for orphaned scans after operator restart
- Periodic cleanup of stuck jobs

## New Files
- DESIGN.md: Comprehensive architecture design document
- internal/jobmanager/: Job Manager for creating/monitoring scanner jobs
- internal/scanner/runner.go: Scanner mode implementation
- internal/annotations/: Annotation parsing utilities
- charts/nuclei-operator/templates/scanner-rbac.yaml: Scanner RBAC

## API Changes
- Added ScannerConfig struct for per-scan scanner configuration
- Added JobReference struct for tracking scanner jobs
- Added ScannerConfig field to NucleiScanSpec
- Added JobRef and ScanStartTime fields to NucleiScanStatus

## Supported Annotations
- nuclei.homelab.mortenolsen.pro/enabled
- nuclei.homelab.mortenolsen.pro/templates
- nuclei.homelab.mortenolsen.pro/severity
- nuclei.homelab.mortenolsen.pro/schedule
- nuclei.homelab.mortenolsen.pro/timeout
- nuclei.homelab.mortenolsen.pro/scanner-image

## RBAC Updates
- Added Job and Pod permissions for operator
- Created separate scanner service account with minimal permissions

## Documentation
- Updated README, user-guide, api.md, and Helm chart README
- Added example annotated Ingress resources
This commit is contained in:
Morten Olsen
2025-12-12 20:51:23 +01:00
parent 519ed32de3
commit 335689da22
22 changed files with 3060 additions and 245 deletions

View File

@@ -0,0 +1,211 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package annotations
import (
"strconv"
"strings"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
nucleiv1alpha1 "github.com/mortenolsen/nuclei-operator/api/v1alpha1"
)
const (
// AnnotationPrefix is the prefix for all nuclei annotations
AnnotationPrefix = "nuclei.homelab.mortenolsen.pro/"
// AnnotationEnabled controls whether scanning is enabled for a resource
AnnotationEnabled = AnnotationPrefix + "enabled"
// AnnotationTemplates specifies comma-separated template paths or tags
AnnotationTemplates = AnnotationPrefix + "templates"
// AnnotationSeverity specifies comma-separated severity filter
AnnotationSeverity = AnnotationPrefix + "severity"
// AnnotationSchedule specifies the cron schedule for periodic scans
AnnotationSchedule = AnnotationPrefix + "schedule"
// AnnotationTimeout specifies the scan timeout
AnnotationTimeout = AnnotationPrefix + "timeout"
// AnnotationScannerImage overrides the scanner image
AnnotationScannerImage = AnnotationPrefix + "scanner-image"
// AnnotationExcludeTemplates specifies templates to exclude
AnnotationExcludeTemplates = AnnotationPrefix + "exclude-templates"
// AnnotationRateLimit specifies requests per second limit
AnnotationRateLimit = AnnotationPrefix + "rate-limit"
// AnnotationTags specifies template tags to include
AnnotationTags = AnnotationPrefix + "tags"
// AnnotationExcludeTags specifies template tags to exclude
AnnotationExcludeTags = AnnotationPrefix + "exclude-tags"
)
// ScanConfig holds parsed annotation configuration
type ScanConfig struct {
// Enabled indicates if scanning is enabled
Enabled bool
// Templates to use for scanning
Templates []string
// Severity filter
Severity []string
// Schedule for periodic scans (cron format)
Schedule string
// Timeout for the scan
Timeout *metav1.Duration
// ScannerImage overrides the default scanner image
ScannerImage string
// ExcludeTemplates to exclude from scanning
ExcludeTemplates []string
// RateLimit for requests per second
RateLimit int
// Tags to include
Tags []string
// ExcludeTags to exclude
ExcludeTags []string
}
// ParseAnnotations extracts scan configuration from resource annotations
func ParseAnnotations(annotations map[string]string) *ScanConfig {
config := &ScanConfig{
Enabled: true, // Default to enabled
}
if annotations == nil {
return config
}
// Parse enabled
if v, ok := annotations[AnnotationEnabled]; ok {
config.Enabled = strings.ToLower(v) == "true"
}
// Parse templates
if v, ok := annotations[AnnotationTemplates]; ok && v != "" {
config.Templates = splitAndTrim(v)
}
// Parse severity
if v, ok := annotations[AnnotationSeverity]; ok && v != "" {
config.Severity = splitAndTrim(v)
}
// Parse schedule
if v, ok := annotations[AnnotationSchedule]; ok {
config.Schedule = v
}
// Parse timeout
if v, ok := annotations[AnnotationTimeout]; ok {
if d, err := time.ParseDuration(v); err == nil {
config.Timeout = &metav1.Duration{Duration: d}
}
}
// Parse scanner image
if v, ok := annotations[AnnotationScannerImage]; ok {
config.ScannerImage = v
}
// Parse exclude templates
if v, ok := annotations[AnnotationExcludeTemplates]; ok && v != "" {
config.ExcludeTemplates = splitAndTrim(v)
}
// Parse rate limit
if v, ok := annotations[AnnotationRateLimit]; ok {
if n, err := strconv.Atoi(v); err == nil {
config.RateLimit = n
}
}
// Parse tags
if v, ok := annotations[AnnotationTags]; ok && v != "" {
config.Tags = splitAndTrim(v)
}
// Parse exclude tags
if v, ok := annotations[AnnotationExcludeTags]; ok && v != "" {
config.ExcludeTags = splitAndTrim(v)
}
return config
}
// ApplyToNucleiScanSpec applies the annotation config to a NucleiScan spec
func (c *ScanConfig) ApplyToNucleiScanSpec(spec *nucleiv1alpha1.NucleiScanSpec) {
// Apply templates if specified
if len(c.Templates) > 0 {
spec.Templates = c.Templates
}
// Apply severity if specified
if len(c.Severity) > 0 {
spec.Severity = c.Severity
}
// Apply schedule if specified
if c.Schedule != "" {
spec.Schedule = c.Schedule
}
// Apply scanner config if any scanner-specific settings are specified
if c.ScannerImage != "" || c.Timeout != nil {
if spec.ScannerConfig == nil {
spec.ScannerConfig = &nucleiv1alpha1.ScannerConfig{}
}
if c.ScannerImage != "" {
spec.ScannerConfig.Image = c.ScannerImage
}
if c.Timeout != nil {
spec.ScannerConfig.Timeout = c.Timeout
}
}
}
// IsEnabled returns true if scanning is enabled
func (c *ScanConfig) IsEnabled() bool {
return c.Enabled
}
// splitAndTrim splits a string by comma and trims whitespace from each element
func splitAndTrim(s string) []string {
parts := strings.Split(s, ",")
result := make([]string, 0, len(parts))
for _, p := range parts {
p = strings.TrimSpace(p)
if p != "" {
result = append(result, p)
}
}
return result
}

View File

@@ -31,6 +31,7 @@ import (
logf "sigs.k8s.io/controller-runtime/pkg/log"
nucleiv1alpha1 "github.com/mortenolsen/nuclei-operator/api/v1alpha1"
"github.com/mortenolsen/nuclei-operator/internal/annotations"
)
// IngressReconciler reconciles Ingress objects and creates NucleiScan resources
@@ -59,12 +60,8 @@ func (r *IngressReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, err
}
// Extract target URLs from the Ingress
targets := extractURLsFromIngress(ingress)
if len(targets) == 0 {
log.Info("No targets extracted from Ingress, skipping NucleiScan creation")
return ctrl.Result{}, nil
}
// Parse annotations to get scan configuration
scanConfig := annotations.ParseAnnotations(ingress.Annotations)
// Define the NucleiScan name based on the Ingress name
nucleiScanName := fmt.Sprintf("%s-scan", ingress.Name)
@@ -81,23 +78,48 @@ func (r *IngressReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, err
}
// Check if scanning is disabled via annotations
if !scanConfig.IsEnabled() {
// Scanning disabled - delete existing NucleiScan if it exists
if err == nil {
log.Info("Scanning disabled via annotation, deleting existing NucleiScan", "nucleiScan", nucleiScanName)
if err := r.Delete(ctx, existingScan); err != nil && !apierrors.IsNotFound(err) {
log.Error(err, "Failed to delete NucleiScan")
return ctrl.Result{}, err
}
}
return ctrl.Result{}, nil
}
// Extract target URLs from the Ingress
targets := extractURLsFromIngress(ingress)
if len(targets) == 0 {
log.Info("No targets extracted from Ingress, skipping NucleiScan creation")
return ctrl.Result{}, nil
}
if apierrors.IsNotFound(err) {
// Create a new NucleiScan
spec := nucleiv1alpha1.NucleiScanSpec{
SourceRef: nucleiv1alpha1.SourceReference{
APIVersion: "networking.k8s.io/v1",
Kind: "Ingress",
Name: ingress.Name,
Namespace: ingress.Namespace,
UID: string(ingress.UID),
},
Targets: targets,
}
// Apply annotation configuration to the spec
scanConfig.ApplyToNucleiScanSpec(&spec)
nucleiScan := &nucleiv1alpha1.NucleiScan{
ObjectMeta: metav1.ObjectMeta{
Name: nucleiScanName,
Namespace: ingress.Namespace,
},
Spec: nucleiv1alpha1.NucleiScanSpec{
SourceRef: nucleiv1alpha1.SourceReference{
APIVersion: "networking.k8s.io/v1",
Kind: "Ingress",
Name: ingress.Name,
Namespace: ingress.Namespace,
UID: string(ingress.UID),
},
Targets: targets,
},
Spec: spec,
}
// Set owner reference for garbage collection
@@ -115,18 +137,31 @@ func (r *IngressReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, nil
}
// NucleiScan exists - check if targets need to be updated
// NucleiScan exists - check if it needs to be updated
needsUpdate := false
// Check if targets changed
if !reflect.DeepEqual(existingScan.Spec.Targets, targets) {
existingScan.Spec.Targets = targets
// Also update the SourceRef UID in case it changed (e.g., Ingress was recreated)
existingScan.Spec.SourceRef.UID = string(ingress.UID)
needsUpdate = true
}
// Also update the SourceRef UID in case it changed (e.g., Ingress was recreated)
if existingScan.Spec.SourceRef.UID != string(ingress.UID) {
existingScan.Spec.SourceRef.UID = string(ingress.UID)
needsUpdate = true
}
// Apply annotation configuration
scanConfig.ApplyToNucleiScanSpec(&existingScan.Spec)
if needsUpdate {
if err := r.Update(ctx, existingScan); err != nil {
log.Error(err, "Failed to update NucleiScan targets")
log.Error(err, "Failed to update NucleiScan")
return ctrl.Result{}, err
}
log.Info("Updated NucleiScan targets for Ingress", "nucleiScan", nucleiScanName, "targets", targets)
log.Info("Updated NucleiScan for Ingress", "nucleiScan", nucleiScanName, "targets", targets)
}
return ctrl.Result{}, nil

View File

@@ -23,16 +23,20 @@ import (
"os"
"time"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
logf "sigs.k8s.io/controller-runtime/pkg/log"
nucleiv1alpha1 "github.com/mortenolsen/nuclei-operator/api/v1alpha1"
"github.com/mortenolsen/nuclei-operator/internal/scanner"
"github.com/mortenolsen/nuclei-operator/internal/jobmanager"
)
const (
@@ -74,66 +78,78 @@ const (
ReasonScanSuspended = "ScanSuspended"
)
// BackoffConfig holds configuration for exponential backoff
type BackoffConfig struct {
Initial time.Duration
Max time.Duration
Multiplier float64
// ReconcilerConfig holds configuration for the NucleiScanReconciler
type ReconcilerConfig struct {
RescanAge time.Duration
BackoffInitial time.Duration
BackoffMax time.Duration
BackoffMultiplier float64
}
// DefaultReconcilerConfig returns a ReconcilerConfig with default values
func DefaultReconcilerConfig() ReconcilerConfig {
config := ReconcilerConfig{
RescanAge: defaultRescanAge,
BackoffInitial: defaultBackoffInitial,
BackoffMax: defaultBackoffMax,
BackoffMultiplier: defaultBackoffMultiplier,
}
// Override from environment variables
if envVal := os.Getenv(envRescanAge); envVal != "" {
if parsed, err := time.ParseDuration(envVal); err == nil {
config.RescanAge = parsed
}
}
if envVal := os.Getenv(envBackoffInitial); envVal != "" {
if parsed, err := time.ParseDuration(envVal); err == nil {
config.BackoffInitial = parsed
}
}
if envVal := os.Getenv(envBackoffMax); envVal != "" {
if parsed, err := time.ParseDuration(envVal); err == nil {
config.BackoffMax = parsed
}
}
if envVal := os.Getenv(envBackoffMultiplier); envVal != "" {
if parsed, err := parseFloat(envVal); err == nil && parsed > 0 {
config.BackoffMultiplier = parsed
}
}
return config
}
// NucleiScanReconciler reconciles a NucleiScan object
type NucleiScanReconciler struct {
client.Client
Scheme *runtime.Scheme
Scanner scanner.Scanner
RescanAge time.Duration
Recorder record.EventRecorder
JobManager *jobmanager.JobManager
Config ReconcilerConfig
HTTPClient *http.Client
Backoff BackoffConfig
}
// NewNucleiScanReconciler creates a new NucleiScanReconciler with default settings
func NewNucleiScanReconciler(c client.Client, scheme *runtime.Scheme, s scanner.Scanner) *NucleiScanReconciler {
rescanAge := defaultRescanAge
if envVal := os.Getenv(envRescanAge); envVal != "" {
if parsed, err := time.ParseDuration(envVal); err == nil {
rescanAge = parsed
}
}
backoffInitial := defaultBackoffInitial
if envVal := os.Getenv(envBackoffInitial); envVal != "" {
if parsed, err := time.ParseDuration(envVal); err == nil {
backoffInitial = parsed
}
}
backoffMax := defaultBackoffMax
if envVal := os.Getenv(envBackoffMax); envVal != "" {
if parsed, err := time.ParseDuration(envVal); err == nil {
backoffMax = parsed
}
}
backoffMultiplier := defaultBackoffMultiplier
if envVal := os.Getenv(envBackoffMultiplier); envVal != "" {
if parsed, err := parseFloat(envVal); err == nil && parsed > 0 {
backoffMultiplier = parsed
}
}
// NewNucleiScanReconciler creates a new NucleiScanReconciler with the given configuration
func NewNucleiScanReconciler(
c client.Client,
scheme *runtime.Scheme,
recorder record.EventRecorder,
jobManager *jobmanager.JobManager,
config ReconcilerConfig,
) *NucleiScanReconciler {
return &NucleiScanReconciler{
Client: c,
Scheme: scheme,
Scanner: s,
RescanAge: rescanAge,
Client: c,
Scheme: scheme,
Recorder: recorder,
JobManager: jobManager,
Config: config,
HTTPClient: &http.Client{
Timeout: 10 * time.Second,
},
Backoff: BackoffConfig{
Initial: backoffInitial,
Max: backoffMax,
Multiplier: backoffMultiplier,
},
}
}
@@ -147,15 +163,15 @@ func parseFloat(s string) (float64, error) {
// calculateBackoff calculates the next backoff duration based on retry count
func (r *NucleiScanReconciler) calculateBackoff(retryCount int) time.Duration {
if retryCount <= 0 {
return r.Backoff.Initial
return r.Config.BackoffInitial
}
// Calculate exponential backoff: initial * multiplier^retryCount
backoff := float64(r.Backoff.Initial)
backoff := float64(r.Config.BackoffInitial)
for i := 0; i < retryCount; i++ {
backoff *= r.Backoff.Multiplier
if backoff > float64(r.Backoff.Max) {
return r.Backoff.Max
backoff *= r.Config.BackoffMultiplier
if backoff > float64(r.Config.BackoffMax) {
return r.Config.BackoffMax
}
}
@@ -166,6 +182,8 @@ func (r *NucleiScanReconciler) calculateBackoff(retryCount int) time.Duration {
// +kubebuilder:rbac:groups=nuclei.homelab.mortenolsen.pro,resources=nucleiscans/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=nuclei.homelab.mortenolsen.pro,resources=nucleiscans/finalizers,verbs=update
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
@@ -215,15 +233,7 @@ func (r *NucleiScanReconciler) Reconcile(ctx context.Context, req ctrl.Request)
case nucleiv1alpha1.ScanPhasePending:
return r.handlePendingPhase(ctx, nucleiScan)
case nucleiv1alpha1.ScanPhaseRunning:
// Running phase on startup means the scan was interrupted (operator restart)
// Reset to Pending to re-run the scan
log.Info("Found stale Running scan, resetting to Pending (operator likely restarted)")
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhasePending
nucleiScan.Status.LastError = "Scan was interrupted due to operator restart, re-queuing"
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, nil
return r.handleRunningPhase(ctx, nucleiScan)
case nucleiv1alpha1.ScanPhaseCompleted:
return r.handleCompletedPhase(ctx, nucleiScan)
case nucleiv1alpha1.ScanPhaseFailed:
@@ -245,8 +255,15 @@ func (r *NucleiScanReconciler) handleDeletion(ctx context.Context, nucleiScan *n
if controllerutil.ContainsFinalizer(nucleiScan, finalizerName) {
log.Info("Handling deletion, performing cleanup", "name", nucleiScan.Name, "namespace", nucleiScan.Namespace)
// Perform any cleanup here (e.g., cancel running scans)
// In our synchronous implementation, there's nothing to clean up
// Clean up any running scanner job
if nucleiScan.Status.JobRef != nil {
log.Info("Deleting scanner job", "job", nucleiScan.Status.JobRef.Name)
if err := r.JobManager.DeleteJob(ctx, nucleiScan.Status.JobRef.Name, nucleiScan.Namespace); err != nil {
if !apierrors.IsNotFound(err) {
log.Error(err, "Failed to delete scanner job", "job", nucleiScan.Status.JobRef.Name)
}
}
}
// Remove finalizer
log.Info("Removing finalizer", "finalizer", finalizerName)
@@ -261,56 +278,55 @@ func (r *NucleiScanReconciler) handleDeletion(ctx context.Context, nucleiScan *n
return ctrl.Result{}, nil
}
// handlePendingPhase handles the Pending phase - starts a new scan
// handlePendingPhase handles the Pending phase - creates a scanner job
func (r *NucleiScanReconciler) handlePendingPhase(ctx context.Context, nucleiScan *nucleiv1alpha1.NucleiScan) (ctrl.Result, error) {
log := logf.FromContext(ctx)
log.Info("Preparing to scan", "targets", len(nucleiScan.Spec.Targets))
logger := logf.FromContext(ctx)
logger.Info("Preparing to scan", "targets", len(nucleiScan.Spec.Targets))
// Check if we're at capacity
atCapacity, err := r.JobManager.AtCapacity(ctx)
if err != nil {
logger.Error(err, "Failed to check job capacity")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
if atCapacity {
logger.Info("At maximum concurrent scans, requeuing")
r.Recorder.Event(nucleiScan, corev1.EventTypeNormal, "AtCapacity",
"Maximum concurrent scans reached, waiting for capacity")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
// Check if at least one target is available before scanning
availableTargets, unavailableTargets := r.checkTargetsAvailability(ctx, nucleiScan.Spec.Targets)
if len(availableTargets) == 0 {
// Calculate backoff based on retry count
retryCount := nucleiScan.Status.RetryCount
backoffDuration := r.calculateBackoff(retryCount)
log.Info("No targets are available yet, waiting with backoff...",
"unavailable", len(unavailableTargets),
"retryCount", retryCount,
"backoffDuration", backoffDuration)
// Update condition and retry count
now := metav1.Now()
meta.SetStatusCondition(&nucleiScan.Status.Conditions, metav1.Condition{
Type: ConditionTypeReady,
Status: metav1.ConditionFalse,
Reason: "WaitingForTargets",
Message: fmt.Sprintf("Waiting for targets to become available (%d unavailable, retry #%d, next check in %v)", len(unavailableTargets), retryCount+1, backoffDuration),
LastTransitionTime: now,
})
nucleiScan.Status.LastError = fmt.Sprintf("Targets not available: %v", unavailableTargets)
nucleiScan.Status.RetryCount = retryCount + 1
nucleiScan.Status.LastRetryTime = &now
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
// Requeue with exponential backoff
return ctrl.Result{RequeueAfter: backoffDuration}, nil
return r.handleTargetsUnavailable(ctx, nucleiScan, unavailableTargets)
}
// Reset retry count since targets are now available
if nucleiScan.Status.RetryCount > 0 {
log.Info("Targets now available, resetting retry count", "previousRetries", nucleiScan.Status.RetryCount)
logger.Info("Targets now available, resetting retry count", "previousRetries", nucleiScan.Status.RetryCount)
nucleiScan.Status.RetryCount = 0
nucleiScan.Status.LastRetryTime = nil
}
log.Info("Starting scan", "availableTargets", len(availableTargets), "unavailableTargets", len(unavailableTargets))
logger.Info("Creating scanner job", "availableTargets", len(availableTargets), "unavailableTargets", len(unavailableTargets))
// Update status to Running
// Create the scanner job
job, err := r.JobManager.CreateScanJob(ctx, nucleiScan)
if err != nil {
logger.Error(err, "Failed to create scanner job")
r.Recorder.Event(nucleiScan, corev1.EventTypeWarning, "JobCreationFailed", err.Error())
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
// Update status to Running with job reference
now := metav1.Now()
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhaseRunning
nucleiScan.Status.JobRef = &nucleiv1alpha1.JobReference{
Name: job.Name,
UID: string(job.UID),
StartTime: &now,
}
nucleiScan.Status.LastScanTime = &now
nucleiScan.Status.LastError = ""
nucleiScan.Status.ObservedGeneration = nucleiScan.Generation
@@ -320,30 +336,150 @@ func (r *NucleiScanReconciler) handlePendingPhase(ctx context.Context, nucleiSca
Type: ConditionTypeScanActive,
Status: metav1.ConditionTrue,
Reason: ReasonScanRunning,
Message: fmt.Sprintf("Scan is in progress (%d targets)", len(availableTargets)),
Message: fmt.Sprintf("Scanner job %s created for %d targets", job.Name, len(nucleiScan.Spec.Targets)),
LastTransitionTime: now,
})
if err := r.Status().Update(ctx, nucleiScan); err != nil {
logger.Error(err, "Failed to update status")
return ctrl.Result{}, err
}
r.Recorder.Event(nucleiScan, corev1.EventTypeNormal, "ScanJobCreated",
fmt.Sprintf("Created scanner job %s", job.Name))
// Requeue to monitor job status
return ctrl.Result{RequeueAfter: 15 * time.Second}, nil
}
// handleTargetsUnavailable handles the case when no targets are available
func (r *NucleiScanReconciler) handleTargetsUnavailable(ctx context.Context, nucleiScan *nucleiv1alpha1.NucleiScan, unavailableTargets []string) (ctrl.Result, error) {
logger := logf.FromContext(ctx)
// Calculate backoff based on retry count
retryCount := nucleiScan.Status.RetryCount
backoffDuration := r.calculateBackoff(retryCount)
logger.Info("No targets are available yet, waiting with backoff...",
"unavailable", len(unavailableTargets),
"retryCount", retryCount,
"backoffDuration", backoffDuration)
// Update condition and retry count
now := metav1.Now()
meta.SetStatusCondition(&nucleiScan.Status.Conditions, metav1.Condition{
Type: ConditionTypeReady,
Status: metav1.ConditionFalse,
Reason: "WaitingForTargets",
Message: fmt.Sprintf("Waiting for targets to become available (%d unavailable, retry #%d, next check in %v)", len(unavailableTargets), retryCount+1, backoffDuration),
LastTransitionTime: now,
})
nucleiScan.Status.LastError = fmt.Sprintf("Targets not available: %v", unavailableTargets)
nucleiScan.Status.RetryCount = retryCount + 1
nucleiScan.Status.LastRetryTime = &now
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
// Build scan options
options := scanner.ScanOptions{
Templates: nucleiScan.Spec.Templates,
Severity: nucleiScan.Spec.Severity,
Timeout: 30 * time.Minute, // Default timeout
// Requeue with exponential backoff
return ctrl.Result{RequeueAfter: backoffDuration}, nil
}
// handleRunningPhase handles the Running phase - monitors the scanner job
func (r *NucleiScanReconciler) handleRunningPhase(ctx context.Context, nucleiScan *nucleiv1alpha1.NucleiScan) (ctrl.Result, error) {
logger := logf.FromContext(ctx)
// Check if we have a job reference
if nucleiScan.Status.JobRef == nil {
logger.Info("No job reference found, resetting to Pending")
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhasePending
nucleiScan.Status.LastError = "No job reference found, re-queuing scan"
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, nil
}
// Execute the scan with available targets only
result, err := r.Scanner.Scan(ctx, availableTargets, options)
// Get the job
job, err := r.JobManager.GetJob(ctx, nucleiScan.Status.JobRef.Name, nucleiScan.Namespace)
if err != nil {
log.Error(err, "Scan failed")
return r.handleScanError(ctx, nucleiScan, err)
if apierrors.IsNotFound(err) {
logger.Info("Scanner job not found, resetting to Pending")
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhasePending
nucleiScan.Status.LastError = "Scanner job not found, re-queuing scan"
nucleiScan.Status.JobRef = nil
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, nil
}
return ctrl.Result{}, err
}
// Update status with results
return r.handleScanSuccess(ctx, nucleiScan, result)
// Update pod name if available
if nucleiScan.Status.JobRef.PodName == "" {
podName, _ := r.JobManager.GetJobPodName(ctx, job)
if podName != "" {
nucleiScan.Status.JobRef.PodName = podName
if err := r.Status().Update(ctx, nucleiScan); err != nil {
logger.Error(err, "Failed to update pod name")
}
}
}
// Check job status - the scanner pod updates the NucleiScan status directly
// We just need to detect completion/failure for events
if r.JobManager.IsJobSuccessful(job) {
logger.Info("Scanner job completed successfully")
r.Recorder.Event(nucleiScan, corev1.EventTypeNormal, "ScanCompleted",
fmt.Sprintf("Scan completed with %d findings", len(nucleiScan.Status.Findings)))
// Status is already updated by the scanner pod
// Just schedule next scan if needed
return r.scheduleNextScan(ctx, nucleiScan)
}
if r.JobManager.IsJobFailed(job) {
reason := r.JobManager.GetJobFailureReason(job)
logger.Info("Scanner job failed", "reason", reason)
// Update status if not already updated by scanner
if nucleiScan.Status.Phase != nucleiv1alpha1.ScanPhaseFailed {
now := metav1.Now()
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhaseFailed
nucleiScan.Status.LastError = reason
nucleiScan.Status.CompletionTime = &now
// Set conditions
meta.SetStatusCondition(&nucleiScan.Status.Conditions, metav1.Condition{
Type: ConditionTypeScanActive,
Status: metav1.ConditionFalse,
Reason: ReasonScanFailed,
Message: reason,
LastTransitionTime: now,
})
meta.SetStatusCondition(&nucleiScan.Status.Conditions, metav1.Condition{
Type: ConditionTypeReady,
Status: metav1.ConditionFalse,
Reason: ReasonScanFailed,
Message: reason,
LastTransitionTime: now,
})
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
}
r.Recorder.Event(nucleiScan, corev1.EventTypeWarning, "ScanFailed", reason)
return ctrl.Result{}, nil
}
// Job still running, requeue to check again
logger.V(1).Info("Scanner job still running", "job", job.Name)
return ctrl.Result{RequeueAfter: 15 * time.Second}, nil
}
// checkTargetsAvailability checks which targets are reachable
@@ -373,80 +509,6 @@ func (r *NucleiScanReconciler) checkTargetsAvailability(ctx context.Context, tar
return available, unavailable
}
// handleScanSuccess updates the status after a successful scan
func (r *NucleiScanReconciler) handleScanSuccess(ctx context.Context, nucleiScan *nucleiv1alpha1.NucleiScan, result *scanner.ScanResult) (ctrl.Result, error) {
log := logf.FromContext(ctx)
log.Info("Scan completed successfully", "findings", len(result.Findings), "duration", result.Duration)
now := metav1.Now()
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhaseCompleted
nucleiScan.Status.CompletionTime = &now
nucleiScan.Status.Findings = result.Findings
nucleiScan.Status.Summary = &result.Summary
nucleiScan.Status.LastError = ""
// Set conditions
meta.SetStatusCondition(&nucleiScan.Status.Conditions, metav1.Condition{
Type: ConditionTypeScanActive,
Status: metav1.ConditionFalse,
Reason: ReasonScanCompleted,
Message: "Scan completed successfully",
LastTransitionTime: now,
})
message := fmt.Sprintf("Scan completed with %d findings", len(result.Findings))
meta.SetStatusCondition(&nucleiScan.Status.Conditions, metav1.Condition{
Type: ConditionTypeReady,
Status: metav1.ConditionTrue,
Reason: ReasonScanCompleted,
Message: message,
LastTransitionTime: now,
})
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
// If there's a schedule, calculate next scan time
if nucleiScan.Spec.Schedule != "" {
return r.scheduleNextScan(ctx, nucleiScan)
}
return ctrl.Result{}, nil
}
// handleScanError updates the status after a failed scan
func (r *NucleiScanReconciler) handleScanError(ctx context.Context, nucleiScan *nucleiv1alpha1.NucleiScan, scanErr error) (ctrl.Result, error) {
now := metav1.Now()
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhaseFailed
nucleiScan.Status.CompletionTime = &now
nucleiScan.Status.LastError = scanErr.Error()
// Set conditions
meta.SetStatusCondition(&nucleiScan.Status.Conditions, metav1.Condition{
Type: ConditionTypeScanActive,
Status: metav1.ConditionFalse,
Reason: ReasonScanFailed,
Message: scanErr.Error(),
LastTransitionTime: now,
})
meta.SetStatusCondition(&nucleiScan.Status.Conditions, metav1.Condition{
Type: ConditionTypeReady,
Status: metav1.ConditionFalse,
Reason: ReasonScanFailed,
Message: scanErr.Error(),
LastTransitionTime: now,
})
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
// Requeue with backoff for retry
return ctrl.Result{RequeueAfter: defaultErrorRequeueAfter}, nil
}
// handleCompletedPhase handles the Completed phase - checks for scheduled rescans
func (r *NucleiScanReconciler) handleCompletedPhase(ctx context.Context, nucleiScan *nucleiv1alpha1.NucleiScan) (ctrl.Result, error) {
log := logf.FromContext(ctx)
@@ -455,6 +517,7 @@ func (r *NucleiScanReconciler) handleCompletedPhase(ctx context.Context, nucleiS
if nucleiScan.Generation != nucleiScan.Status.ObservedGeneration {
log.Info("Spec changed, triggering new scan")
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhasePending
nucleiScan.Status.JobRef = nil // Clear old job reference
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
@@ -469,9 +532,10 @@ func (r *NucleiScanReconciler) handleCompletedPhase(ctx context.Context, nucleiS
// Check if scan results are stale (older than RescanAge)
if nucleiScan.Status.CompletionTime != nil {
age := time.Since(nucleiScan.Status.CompletionTime.Time)
if age > r.RescanAge {
log.Info("Scan results are stale, triggering rescan", "age", age, "maxAge", r.RescanAge)
if age > r.Config.RescanAge {
log.Info("Scan results are stale, triggering rescan", "age", age, "maxAge", r.Config.RescanAge)
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhasePending
nucleiScan.Status.JobRef = nil // Clear old job reference
nucleiScan.Status.LastError = fmt.Sprintf("Automatic rescan triggered (results were %v old)", age.Round(time.Hour))
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
@@ -480,7 +544,7 @@ func (r *NucleiScanReconciler) handleCompletedPhase(ctx context.Context, nucleiS
}
// Schedule a requeue for when the results will become stale
timeUntilStale := r.RescanAge - age
timeUntilStale := r.Config.RescanAge - age
log.V(1).Info("Scan results still fresh, will check again later", "timeUntilStale", timeUntilStale)
return ctrl.Result{RequeueAfter: timeUntilStale}, nil
}
@@ -496,6 +560,7 @@ func (r *NucleiScanReconciler) handleFailedPhase(ctx context.Context, nucleiScan
if nucleiScan.Generation != nucleiScan.Status.ObservedGeneration {
log.Info("Spec changed, triggering new scan")
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhasePending
nucleiScan.Status.JobRef = nil // Clear old job reference
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
@@ -512,6 +577,11 @@ func (r *NucleiScanReconciler) handleFailedPhase(ctx context.Context, nucleiScan
func (r *NucleiScanReconciler) scheduleNextScan(ctx context.Context, nucleiScan *nucleiv1alpha1.NucleiScan) (ctrl.Result, error) {
log := logf.FromContext(ctx)
// If there's no schedule, nothing to do
if nucleiScan.Spec.Schedule == "" {
return ctrl.Result{}, nil
}
// Parse cron schedule
nextTime, err := getNextScheduleTime(nucleiScan.Spec.Schedule, time.Now())
if err != nil {
@@ -550,6 +620,7 @@ func (r *NucleiScanReconciler) checkScheduledScan(ctx context.Context, nucleiSca
log.Info("Scheduled scan time reached, triggering scan")
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhasePending
nucleiScan.Status.NextScheduledTime = nil
nucleiScan.Status.JobRef = nil // Clear old job reference
if err := r.Status().Update(ctx, nucleiScan); err != nil {
return ctrl.Result{}, err
}
@@ -602,6 +673,7 @@ func getNextScheduleTime(schedule string, from time.Time) (time.Time, error) {
func (r *NucleiScanReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&nucleiv1alpha1.NucleiScan{}).
Owns(&batchv1.Job{}). // Watch Jobs owned by NucleiScan
Named("nucleiscan").
Complete(r)
}

View File

@@ -33,6 +33,7 @@ import (
istionetworkingv1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1"
nucleiv1alpha1 "github.com/mortenolsen/nuclei-operator/api/v1alpha1"
"github.com/mortenolsen/nuclei-operator/internal/annotations"
)
// VirtualServiceReconciler reconciles VirtualService objects and creates NucleiScan resources
@@ -61,12 +62,8 @@ func (r *VirtualServiceReconciler) Reconcile(ctx context.Context, req ctrl.Reque
return ctrl.Result{}, err
}
// Extract target URLs from the VirtualService
targets := extractURLsFromVirtualService(virtualService)
if len(targets) == 0 {
log.Info("No targets extracted from VirtualService, skipping NucleiScan creation")
return ctrl.Result{}, nil
}
// Parse annotations to get scan configuration
scanConfig := annotations.ParseAnnotations(virtualService.Annotations)
// Define the NucleiScan name based on the VirtualService name
nucleiScanName := fmt.Sprintf("%s-scan", virtualService.Name)
@@ -83,23 +80,48 @@ func (r *VirtualServiceReconciler) Reconcile(ctx context.Context, req ctrl.Reque
return ctrl.Result{}, err
}
// Check if scanning is disabled via annotations
if !scanConfig.IsEnabled() {
// Scanning disabled - delete existing NucleiScan if it exists
if err == nil {
log.Info("Scanning disabled via annotation, deleting existing NucleiScan", "nucleiScan", nucleiScanName)
if err := r.Delete(ctx, existingScan); err != nil && !apierrors.IsNotFound(err) {
log.Error(err, "Failed to delete NucleiScan")
return ctrl.Result{}, err
}
}
return ctrl.Result{}, nil
}
// Extract target URLs from the VirtualService
targets := extractURLsFromVirtualService(virtualService)
if len(targets) == 0 {
log.Info("No targets extracted from VirtualService, skipping NucleiScan creation")
return ctrl.Result{}, nil
}
if apierrors.IsNotFound(err) {
// Create a new NucleiScan
spec := nucleiv1alpha1.NucleiScanSpec{
SourceRef: nucleiv1alpha1.SourceReference{
APIVersion: "networking.istio.io/v1beta1",
Kind: "VirtualService",
Name: virtualService.Name,
Namespace: virtualService.Namespace,
UID: string(virtualService.UID),
},
Targets: targets,
}
// Apply annotation configuration to the spec
scanConfig.ApplyToNucleiScanSpec(&spec)
nucleiScan := &nucleiv1alpha1.NucleiScan{
ObjectMeta: metav1.ObjectMeta{
Name: nucleiScanName,
Namespace: virtualService.Namespace,
},
Spec: nucleiv1alpha1.NucleiScanSpec{
SourceRef: nucleiv1alpha1.SourceReference{
APIVersion: "networking.istio.io/v1beta1",
Kind: "VirtualService",
Name: virtualService.Name,
Namespace: virtualService.Namespace,
UID: string(virtualService.UID),
},
Targets: targets,
},
Spec: spec,
}
// Set owner reference for garbage collection
@@ -117,18 +139,31 @@ func (r *VirtualServiceReconciler) Reconcile(ctx context.Context, req ctrl.Reque
return ctrl.Result{}, nil
}
// NucleiScan exists - check if targets need to be updated
// NucleiScan exists - check if it needs to be updated
needsUpdate := false
// Check if targets changed
if !reflect.DeepEqual(existingScan.Spec.Targets, targets) {
existingScan.Spec.Targets = targets
// Also update the SourceRef UID in case it changed (e.g., VirtualService was recreated)
existingScan.Spec.SourceRef.UID = string(virtualService.UID)
needsUpdate = true
}
// Also update the SourceRef UID in case it changed (e.g., VirtualService was recreated)
if existingScan.Spec.SourceRef.UID != string(virtualService.UID) {
existingScan.Spec.SourceRef.UID = string(virtualService.UID)
needsUpdate = true
}
// Apply annotation configuration
scanConfig.ApplyToNucleiScanSpec(&existingScan.Spec)
if needsUpdate {
if err := r.Update(ctx, existingScan); err != nil {
log.Error(err, "Failed to update NucleiScan targets")
log.Error(err, "Failed to update NucleiScan")
return ctrl.Result{}, err
}
log.Info("Updated NucleiScan targets for VirtualService", "nucleiScan", nucleiScanName, "targets", targets)
log.Info("Updated NucleiScan for VirtualService", "nucleiScan", nucleiScanName, "targets", targets)
}
return ctrl.Result{}, nil

View File

@@ -0,0 +1,427 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package jobmanager
import (
"context"
"fmt"
"time"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/log"
nucleiv1alpha1 "github.com/mortenolsen/nuclei-operator/api/v1alpha1"
)
const (
// DefaultScannerImage is the default image used for scanner pods
DefaultScannerImage = "ghcr.io/morten-olsen/nuclei-operator:latest"
// DefaultTimeout is the default scan timeout
DefaultTimeout = 30 * time.Minute
// DefaultTTLAfterFinished is the default TTL for completed jobs
DefaultTTLAfterFinished = 3600 // 1 hour
// DefaultBackoffLimit is the default number of retries for failed jobs
DefaultBackoffLimit = 2
// LabelManagedBy is the label key for identifying managed resources
LabelManagedBy = "app.kubernetes.io/managed-by"
// LabelComponent is the label key for component identification
LabelComponent = "app.kubernetes.io/component"
// LabelScanName is the label key for the scan name
LabelScanName = "nuclei.homelab.mortenolsen.pro/scan-name"
// LabelScanNamespace is the label key for the scan namespace
LabelScanNamespace = "nuclei.homelab.mortenolsen.pro/scan-namespace"
)
// Config holds the configuration for the JobManager
type Config struct {
// ScannerImage is the default image to use for scanner pods
ScannerImage string
// DefaultTimeout is the default scan timeout
DefaultTimeout time.Duration
// TTLAfterFinished is the TTL for completed jobs in seconds
TTLAfterFinished int32
// BackoffLimit is the number of retries for failed jobs
BackoffLimit int32
// MaxConcurrent is the maximum number of concurrent scan jobs
MaxConcurrent int
// ServiceAccountName is the service account to use for scanner pods
ServiceAccountName string
// DefaultResources are the default resource requirements for scanner pods
DefaultResources corev1.ResourceRequirements
// DefaultTemplates are the default templates to use for scans
DefaultTemplates []string
// DefaultSeverity is the default severity filter
DefaultSeverity []string
}
// DefaultConfig returns a Config with default values
func DefaultConfig() Config {
return Config{
ScannerImage: DefaultScannerImage,
DefaultTimeout: DefaultTimeout,
TTLAfterFinished: DefaultTTLAfterFinished,
BackoffLimit: DefaultBackoffLimit,
MaxConcurrent: 5,
ServiceAccountName: "nuclei-scanner",
DefaultResources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("256Mi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
},
},
}
}
// JobManager manages scanner jobs for NucleiScan resources
type JobManager struct {
client.Client
Scheme *runtime.Scheme
Config Config
}
// NewJobManager creates a new JobManager with the given configuration
func NewJobManager(c client.Client, scheme *runtime.Scheme, config Config) *JobManager {
return &JobManager{
Client: c,
Scheme: scheme,
Config: config,
}
}
// CreateScanJob creates a new scanner job for the given NucleiScan
func (m *JobManager) CreateScanJob(ctx context.Context, scan *nucleiv1alpha1.NucleiScan) (*batchv1.Job, error) {
logger := log.FromContext(ctx)
job := m.buildJob(scan)
// Set owner reference so the job is garbage collected when the scan is deleted
if err := controllerutil.SetControllerReference(scan, job, m.Scheme); err != nil {
return nil, fmt.Errorf("failed to set controller reference: %w", err)
}
logger.Info("Creating scanner job",
"job", job.Name,
"namespace", job.Namespace,
"image", job.Spec.Template.Spec.Containers[0].Image,
"targets", len(scan.Spec.Targets))
if err := m.Create(ctx, job); err != nil {
return nil, fmt.Errorf("failed to create job: %w", err)
}
return job, nil
}
// GetJob retrieves a job by name and namespace
func (m *JobManager) GetJob(ctx context.Context, name, namespace string) (*batchv1.Job, error) {
job := &batchv1.Job{}
err := m.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, job)
if err != nil {
return nil, err
}
return job, nil
}
// DeleteJob deletes a job by name and namespace
func (m *JobManager) DeleteJob(ctx context.Context, name, namespace string) error {
job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
}
return m.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground))
}
// GetJobPodName returns the name of the pod created by the job
func (m *JobManager) GetJobPodName(ctx context.Context, job *batchv1.Job) (string, error) {
podList := &corev1.PodList{}
err := m.List(ctx, podList,
client.InNamespace(job.Namespace),
client.MatchingLabels{"job-name": job.Name})
if err != nil {
return "", err
}
if len(podList.Items) == 0 {
return "", nil
}
// Return the first pod (there should only be one for our jobs)
return podList.Items[0].Name, nil
}
// IsJobComplete returns true if the job has completed (successfully or failed)
func (m *JobManager) IsJobComplete(job *batchv1.Job) bool {
for _, condition := range job.Status.Conditions {
if (condition.Type == batchv1.JobComplete || condition.Type == batchv1.JobFailed) &&
condition.Status == corev1.ConditionTrue {
return true
}
}
return false
}
// IsJobSuccessful returns true if the job completed successfully
func (m *JobManager) IsJobSuccessful(job *batchv1.Job) bool {
for _, condition := range job.Status.Conditions {
if condition.Type == batchv1.JobComplete && condition.Status == corev1.ConditionTrue {
return true
}
}
return false
}
// IsJobFailed returns true if the job failed
func (m *JobManager) IsJobFailed(job *batchv1.Job) bool {
for _, condition := range job.Status.Conditions {
if condition.Type == batchv1.JobFailed && condition.Status == corev1.ConditionTrue {
return true
}
}
return false
}
// GetJobFailureReason returns the reason for job failure
func (m *JobManager) GetJobFailureReason(job *batchv1.Job) string {
for _, condition := range job.Status.Conditions {
if condition.Type == batchv1.JobFailed && condition.Status == corev1.ConditionTrue {
return condition.Message
}
}
return "Unknown failure reason"
}
// CountActiveJobs returns the number of currently active scan jobs
func (m *JobManager) CountActiveJobs(ctx context.Context) (int, error) {
jobList := &batchv1.JobList{}
err := m.List(ctx, jobList, client.MatchingLabels{
LabelManagedBy: "nuclei-operator",
LabelComponent: "scanner",
})
if err != nil {
return 0, err
}
count := 0
for _, job := range jobList.Items {
if job.Status.Active > 0 {
count++
}
}
return count, nil
}
// AtCapacity returns true if the maximum number of concurrent jobs has been reached
func (m *JobManager) AtCapacity(ctx context.Context) (bool, error) {
count, err := m.CountActiveJobs(ctx)
if err != nil {
return false, err
}
return count >= m.Config.MaxConcurrent, nil
}
// CleanupOrphanedJobs removes jobs that no longer have an associated NucleiScan
func (m *JobManager) CleanupOrphanedJobs(ctx context.Context) error {
logger := log.FromContext(ctx)
jobList := &batchv1.JobList{}
err := m.List(ctx, jobList, client.MatchingLabels{
LabelManagedBy: "nuclei-operator",
LabelComponent: "scanner",
})
if err != nil {
return err
}
for _, job := range jobList.Items {
// Check if owner reference exists and the owner still exists
ownerRef := metav1.GetControllerOf(&job)
if ownerRef == nil {
logger.Info("Deleting orphaned job without owner", "job", job.Name, "namespace", job.Namespace)
if err := m.DeleteJob(ctx, job.Name, job.Namespace); err != nil && !apierrors.IsNotFound(err) {
logger.Error(err, "Failed to delete orphaned job", "job", job.Name)
}
continue
}
// Check if the job is stuck (running longer than 2x the timeout)
if job.Status.StartTime != nil {
maxDuration := 2 * m.Config.DefaultTimeout
if time.Since(job.Status.StartTime.Time) > maxDuration && job.Status.Active > 0 {
logger.Info("Deleting stuck job", "job", job.Name, "namespace", job.Namespace,
"age", time.Since(job.Status.StartTime.Time))
if err := m.DeleteJob(ctx, job.Name, job.Namespace); err != nil && !apierrors.IsNotFound(err) {
logger.Error(err, "Failed to delete stuck job", "job", job.Name)
}
}
}
}
return nil
}
// buildJob creates a Job specification for the given NucleiScan
func (m *JobManager) buildJob(scan *nucleiv1alpha1.NucleiScan) *batchv1.Job {
// Generate a unique job name
jobName := fmt.Sprintf("nucleiscan-%s-%d", scan.Name, time.Now().Unix())
if len(jobName) > 63 {
jobName = jobName[:63]
}
// Determine the scanner image
image := m.Config.ScannerImage
if scan.Spec.ScannerConfig != nil && scan.Spec.ScannerConfig.Image != "" {
image = scan.Spec.ScannerConfig.Image
}
// Determine timeout
timeout := m.Config.DefaultTimeout
if scan.Spec.ScannerConfig != nil && scan.Spec.ScannerConfig.Timeout != nil {
timeout = scan.Spec.ScannerConfig.Timeout.Duration
}
activeDeadlineSeconds := int64(timeout.Seconds())
// Determine resources
resources := m.Config.DefaultResources
if scan.Spec.ScannerConfig != nil && scan.Spec.ScannerConfig.Resources != nil {
resources = *scan.Spec.ScannerConfig.Resources
}
// Build command arguments for scanner mode
args := []string{
"--mode=scanner",
fmt.Sprintf("--scan-name=%s", scan.Name),
fmt.Sprintf("--scan-namespace=%s", scan.Namespace),
}
// Build labels
labels := map[string]string{
LabelManagedBy: "nuclei-operator",
LabelComponent: "scanner",
LabelScanName: scan.Name,
LabelScanNamespace: scan.Namespace,
}
// Build node selector
var nodeSelector map[string]string
if scan.Spec.ScannerConfig != nil && scan.Spec.ScannerConfig.NodeSelector != nil {
nodeSelector = scan.Spec.ScannerConfig.NodeSelector
}
// Build tolerations
var tolerations []corev1.Toleration
if scan.Spec.ScannerConfig != nil && scan.Spec.ScannerConfig.Tolerations != nil {
tolerations = scan.Spec.ScannerConfig.Tolerations
}
job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: jobName,
Namespace: scan.Namespace,
Labels: labels,
},
Spec: batchv1.JobSpec{
TTLSecondsAfterFinished: ptr.To(m.Config.TTLAfterFinished),
BackoffLimit: ptr.To(m.Config.BackoffLimit),
ActiveDeadlineSeconds: &activeDeadlineSeconds,
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: labels,
},
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyNever,
ServiceAccountName: m.Config.ServiceAccountName,
NodeSelector: nodeSelector,
Tolerations: tolerations,
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: ptr.To(true),
RunAsUser: ptr.To(int64(65532)),
RunAsGroup: ptr.To(int64(65532)),
FSGroup: ptr.To(int64(65532)),
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "scanner",
Image: image,
Args: args,
Resources: resources,
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: ptr.To(false),
ReadOnlyRootFilesystem: ptr.To(false), // Nuclei needs temp files
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Env: []corev1.EnvVar{
{
Name: "POD_NAME",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.name",
},
},
},
{
Name: "POD_NAMESPACE",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.namespace",
},
},
},
},
},
},
},
},
},
}
return job
}

View File

@@ -0,0 +1,117 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package jobmanager
import (
"testing"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
nucleiv1alpha1 "github.com/mortenolsen/nuclei-operator/api/v1alpha1"
)
func TestBuildJob(t *testing.T) {
config := DefaultConfig()
manager := &JobManager{
Config: config,
}
scan := &nucleiv1alpha1.NucleiScan{
ObjectMeta: metav1.ObjectMeta{
Name: "test-scan",
Namespace: "default",
},
Spec: nucleiv1alpha1.NucleiScanSpec{
Targets: []string{"https://example.com"},
},
}
job := manager.buildJob(scan)
// Verify job name prefix
if len(job.Name) == 0 {
t.Error("Job name should not be empty")
}
// Verify namespace
if job.Namespace != "default" {
t.Errorf("Expected namespace 'default', got '%s'", job.Namespace)
}
// Verify labels
if job.Labels[LabelManagedBy] != "nuclei-operator" {
t.Error("Job should have managed-by label")
}
if job.Labels[LabelComponent] != "scanner" {
t.Error("Job should have component label")
}
// Verify container
if len(job.Spec.Template.Spec.Containers) != 1 {
t.Error("Job should have exactly one container")
}
container := job.Spec.Template.Spec.Containers[0]
if container.Image != config.ScannerImage {
t.Errorf("Expected image '%s', got '%s'", config.ScannerImage, container.Image)
}
// Verify security context
if job.Spec.Template.Spec.SecurityContext.RunAsNonRoot == nil || !*job.Spec.Template.Spec.SecurityContext.RunAsNonRoot {
t.Error("Pod should run as non-root")
}
}
func TestBuildJobWithCustomConfig(t *testing.T) {
config := DefaultConfig()
manager := &JobManager{
Config: config,
}
customImage := "custom/scanner:v1"
customTimeout := metav1.Duration{Duration: 45 * time.Minute}
scan := &nucleiv1alpha1.NucleiScan{
ObjectMeta: metav1.ObjectMeta{
Name: "test-scan",
Namespace: "default",
},
Spec: nucleiv1alpha1.NucleiScanSpec{
Targets: []string{"https://example.com"},
ScannerConfig: &nucleiv1alpha1.ScannerConfig{
Image: customImage,
Timeout: &customTimeout,
},
},
}
job := manager.buildJob(scan)
// Verify custom image
container := job.Spec.Template.Spec.Containers[0]
if container.Image != customImage {
t.Errorf("Expected custom image '%s', got '%s'", customImage, container.Image)
}
// Verify custom timeout
expectedDeadline := int64(45 * 60) // 45 minutes in seconds
if *job.Spec.ActiveDeadlineSeconds != expectedDeadline {
t.Errorf("Expected deadline %d, got %d", expectedDeadline, *job.Spec.ActiveDeadlineSeconds)
}
}

217
internal/scanner/runner.go Normal file
View File

@@ -0,0 +1,217 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scanner
import (
"context"
"fmt"
"os"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
nucleiv1alpha1 "github.com/mortenolsen/nuclei-operator/api/v1alpha1"
)
// RunnerConfig holds configuration for the scanner runner
type RunnerConfig struct {
// ScanName is the name of the NucleiScan to execute
ScanName string
// ScanNamespace is the namespace of the NucleiScan
ScanNamespace string
// NucleiBinaryPath is the path to the nuclei binary
NucleiBinaryPath string
// TemplatesPath is the path to nuclei templates
TemplatesPath string
}
// Runner executes a single scan and updates the NucleiScan status
type Runner struct {
config RunnerConfig
client client.Client
scanner Scanner
}
// NewRunner creates a new scanner runner
func NewRunner(config RunnerConfig) (*Runner, error) {
// Set up logging
log.SetLogger(zap.New(zap.UseDevMode(false)))
logger := log.Log.WithName("scanner-runner")
// Create scheme
scheme := runtime.NewScheme()
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(nucleiv1alpha1.AddToScheme(scheme))
// Get in-cluster config
restConfig, err := rest.InClusterConfig()
if err != nil {
return nil, fmt.Errorf("failed to get in-cluster config: %w", err)
}
// Create client
k8sClient, err := client.New(restConfig, client.Options{Scheme: scheme})
if err != nil {
return nil, fmt.Errorf("failed to create client: %w", err)
}
// Create scanner with configuration
scannerConfig := Config{
NucleiBinaryPath: config.NucleiBinaryPath,
TemplatesPath: config.TemplatesPath,
}
// Use defaults if not specified
if scannerConfig.NucleiBinaryPath == "" {
scannerConfig.NucleiBinaryPath = "nuclei"
}
nucleiScanner := NewNucleiScanner(scannerConfig)
logger.Info("Scanner runner initialized",
"scanName", config.ScanName,
"scanNamespace", config.ScanNamespace)
return &Runner{
config: config,
client: k8sClient,
scanner: nucleiScanner,
}, nil
}
// Run executes the scan and updates the NucleiScan status
func (r *Runner) Run(ctx context.Context) error {
logger := log.FromContext(ctx).WithName("scanner-runner")
// Fetch the NucleiScan
scan := &nucleiv1alpha1.NucleiScan{}
err := r.client.Get(ctx, types.NamespacedName{
Name: r.config.ScanName,
Namespace: r.config.ScanNamespace,
}, scan)
if err != nil {
return fmt.Errorf("failed to get NucleiScan: %w", err)
}
logger.Info("Starting scan",
"targets", len(scan.Spec.Targets),
"templates", scan.Spec.Templates,
"severity", scan.Spec.Severity)
// Update status to indicate scan has started
startTime := metav1.Now()
scan.Status.ScanStartTime = &startTime
if err := r.client.Status().Update(ctx, scan); err != nil {
logger.Error(err, "Failed to update scan start time")
// Continue anyway - this is not critical
}
// Build scan options
options := ScanOptions{
Templates: scan.Spec.Templates,
Severity: scan.Spec.Severity,
}
// Execute the scan
scanStartTime := time.Now()
result, err := r.scanner.Scan(ctx, scan.Spec.Targets, options)
scanDuration := time.Since(scanStartTime)
// Re-fetch the scan to avoid conflicts
if fetchErr := r.client.Get(ctx, types.NamespacedName{
Name: r.config.ScanName,
Namespace: r.config.ScanNamespace,
}, scan); fetchErr != nil {
return fmt.Errorf("failed to re-fetch NucleiScan: %w", fetchErr)
}
// Update status based on result
completionTime := metav1.Now()
scan.Status.CompletionTime = &completionTime
if err != nil {
logger.Error(err, "Scan failed")
scan.Status.Phase = nucleiv1alpha1.ScanPhaseFailed
scan.Status.LastError = err.Error()
} else {
logger.Info("Scan completed successfully",
"findings", len(result.Findings),
"duration", scanDuration)
scan.Status.Phase = nucleiv1alpha1.ScanPhaseCompleted
scan.Status.Findings = result.Findings
scan.Status.Summary = &nucleiv1alpha1.ScanSummary{
TotalFindings: len(result.Findings),
FindingsBySeverity: countFindingsBySeverity(result.Findings),
TargetsScanned: len(scan.Spec.Targets),
DurationSeconds: int64(scanDuration.Seconds()),
}
scan.Status.LastError = ""
}
// Update the status
if err := r.client.Status().Update(ctx, scan); err != nil {
return fmt.Errorf("failed to update NucleiScan status: %w", err)
}
logger.Info("Scan status updated",
"phase", scan.Status.Phase,
"findings", len(scan.Status.Findings))
return nil
}
// countFindingsBySeverity counts findings by severity level
func countFindingsBySeverity(findings []nucleiv1alpha1.Finding) map[string]int {
counts := make(map[string]int)
for _, f := range findings {
counts[f.Severity]++
}
return counts
}
// RunScannerMode is the entry point for scanner mode
func RunScannerMode(scanName, scanNamespace string) error {
// Get configuration from environment
config := RunnerConfig{
ScanName: scanName,
ScanNamespace: scanNamespace,
NucleiBinaryPath: os.Getenv("NUCLEI_BINARY_PATH"),
TemplatesPath: os.Getenv("NUCLEI_TEMPLATES_PATH"),
}
if config.ScanName == "" || config.ScanNamespace == "" {
return fmt.Errorf("scan name and namespace are required")
}
runner, err := NewRunner(config)
if err != nil {
return fmt.Errorf("failed to create runner: %w", err)
}
ctx := context.Background()
return runner.Run(ctx)
}