feat: implement pod-based scanning architecture

This major refactor moves from synchronous subprocess-based scanning to
asynchronous pod-based scanning using Kubernetes Jobs.

## Architecture Changes
- Scanner jobs are now Kubernetes Jobs with TTLAfterFinished for automatic cleanup
- Jobs have owner references for garbage collection when NucleiScan is deleted
- Configurable concurrency limits, timeouts, and resource requirements

## New Features
- Dual-mode binary: --mode=controller (default) or --mode=scanner
- Annotation-based configuration for Ingress/VirtualService resources
- Operator-level configuration via environment variables
- Startup recovery for orphaned scans after operator restart
- Periodic cleanup of stuck jobs

## New Files
- DESIGN.md: Comprehensive architecture design document
- internal/jobmanager/: Job Manager for creating/monitoring scanner jobs
- internal/scanner/runner.go: Scanner mode implementation
- internal/annotations/: Annotation parsing utilities
- charts/nuclei-operator/templates/scanner-rbac.yaml: Scanner RBAC

## API Changes
- Added ScannerConfig struct for per-scan scanner configuration
- Added JobReference struct for tracking scanner jobs
- Added ScannerConfig field to NucleiScanSpec
- Added JobRef and ScanStartTime fields to NucleiScanStatus

## Supported Annotations
- nuclei.homelab.mortenolsen.pro/enabled
- nuclei.homelab.mortenolsen.pro/templates
- nuclei.homelab.mortenolsen.pro/severity
- nuclei.homelab.mortenolsen.pro/schedule
- nuclei.homelab.mortenolsen.pro/timeout
- nuclei.homelab.mortenolsen.pro/scanner-image

## RBAC Updates
- Added Job and Pod permissions for operator
- Created separate scanner service account with minimal permissions

## Documentation
- Updated README, user-guide, api.md, and Helm chart README
- Added example annotated Ingress resources
This commit is contained in:
Morten Olsen
2025-12-12 20:51:23 +01:00
parent 519ed32de3
commit 12d681ada1
22 changed files with 3060 additions and 245 deletions

View File

@@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)
@@ -44,6 +45,50 @@ type SourceReference struct {
UID string `json:"uid"`
}
// ScannerConfig defines scanner-specific configuration
type ScannerConfig struct {
// Image overrides the default scanner image
// +optional
Image string `json:"image,omitempty"`
// Resources defines resource requirements for the scanner pod
// +optional
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
// Timeout overrides the default scan timeout
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
// TemplateURLs specifies additional template repositories to clone
// +optional
TemplateURLs []string `json:"templateURLs,omitempty"`
// NodeSelector for scanner pod scheduling
// +optional
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
// Tolerations for scanner pod scheduling
// +optional
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
}
// JobReference contains information about the scanner job
type JobReference struct {
// Name of the Job
Name string `json:"name"`
// UID of the Job
UID string `json:"uid"`
// PodName is the name of the scanner pod (for log retrieval)
// +optional
PodName string `json:"podName,omitempty"`
// StartTime when the job was created
// +optional
StartTime *metav1.Time `json:"startTime,omitempty"`
}
// NucleiScanSpec defines the desired state of NucleiScan
type NucleiScanSpec struct {
// SourceRef references the Ingress or VirtualService being scanned
@@ -73,6 +118,10 @@ type NucleiScanSpec struct {
// Suspend prevents scheduled scans from running
// +optional
Suspend bool `json:"suspend,omitempty"`
// ScannerConfig allows overriding scanner settings for this scan
// +optional
ScannerConfig *ScannerConfig `json:"scannerConfig,omitempty"`
}
// ScanPhase represents the current phase of the scan
@@ -200,6 +249,14 @@ type NucleiScanStatus struct {
// LastRetryTime is when the last availability check retry occurred
// +optional
LastRetryTime *metav1.Time `json:"lastRetryTime,omitempty"`
// JobRef references the current or last scanner job
// +optional
JobRef *JobReference `json:"jobRef,omitempty"`
// ScanStartTime is when the scanner pod actually started scanning
// +optional
ScanStartTime *metav1.Time `json:"scanStartTime,omitempty"`
}
// +kubebuilder:object:root=true

View File

@@ -21,7 +21,8 @@ limitations under the License.
package v1alpha1
import (
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)
@@ -61,6 +62,25 @@ func (in *Finding) DeepCopy() *Finding {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *JobReference) DeepCopyInto(out *JobReference) {
*out = *in
if in.StartTime != nil {
in, out := &in.StartTime, &out.StartTime
*out = (*in).DeepCopy()
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobReference.
func (in *JobReference) DeepCopy() *JobReference {
if in == nil {
return nil
}
out := new(JobReference)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NucleiScan) DeepCopyInto(out *NucleiScan) {
*out = *in
@@ -139,6 +159,11 @@ func (in *NucleiScanSpec) DeepCopyInto(out *NucleiScanSpec) {
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.ScannerConfig != nil {
in, out := &in.ScannerConfig, &out.ScannerConfig
*out = new(ScannerConfig)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NucleiScanSpec.
@@ -156,7 +181,7 @@ func (in *NucleiScanStatus) DeepCopyInto(out *NucleiScanStatus) {
*out = *in
if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in))
*out = make([]metav1.Condition, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
@@ -189,6 +214,15 @@ func (in *NucleiScanStatus) DeepCopyInto(out *NucleiScanStatus) {
in, out := &in.LastRetryTime, &out.LastRetryTime
*out = (*in).DeepCopy()
}
if in.JobRef != nil {
in, out := &in.JobRef, &out.JobRef
*out = new(JobReference)
(*in).DeepCopyInto(*out)
}
if in.ScanStartTime != nil {
in, out := &in.ScanStartTime, &out.ScanStartTime
*out = (*in).DeepCopy()
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NucleiScanStatus.
@@ -223,6 +257,50 @@ func (in *ScanSummary) DeepCopy() *ScanSummary {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ScannerConfig) DeepCopyInto(out *ScannerConfig) {
*out = *in
if in.Resources != nil {
in, out := &in.Resources, &out.Resources
*out = new(v1.ResourceRequirements)
(*in).DeepCopyInto(*out)
}
if in.Timeout != nil {
in, out := &in.Timeout, &out.Timeout
*out = new(metav1.Duration)
**out = **in
}
if in.TemplateURLs != nil {
in, out := &in.TemplateURLs, &out.TemplateURLs
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.NodeSelector != nil {
in, out := &in.NodeSelector, &out.NodeSelector
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.Tolerations != nil {
in, out := &in.Tolerations, &out.Tolerations
*out = make([]v1.Toleration, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScannerConfig.
func (in *ScannerConfig) DeepCopy() *ScannerConfig {
if in == nil {
return nil
}
out := new(ScannerConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SourceReference) DeepCopyInto(out *SourceReference) {
*out = *in