feat: implement pod-based scanning architecture

This major refactor moves from synchronous subprocess-based scanning to
asynchronous pod-based scanning using Kubernetes Jobs.

## Architecture Changes
- Scanner jobs are now Kubernetes Jobs with TTLAfterFinished for automatic cleanup
- Jobs have owner references for garbage collection when NucleiScan is deleted
- Configurable concurrency limits, timeouts, and resource requirements

## New Features
- Dual-mode binary: --mode=controller (default) or --mode=scanner
- Annotation-based configuration for Ingress/VirtualService resources
- Operator-level configuration via environment variables
- Startup recovery for orphaned scans after operator restart
- Periodic cleanup of stuck jobs

## New Files
- DESIGN.md: Comprehensive architecture design document
- internal/jobmanager/: Job Manager for creating/monitoring scanner jobs
- internal/scanner/runner.go: Scanner mode implementation
- internal/annotations/: Annotation parsing utilities
- charts/nuclei-operator/templates/scanner-rbac.yaml: Scanner RBAC

## API Changes
- Added ScannerConfig struct for per-scan scanner configuration
- Added JobReference struct for tracking scanner jobs
- Added ScannerConfig field to NucleiScanSpec
- Added JobRef and ScanStartTime fields to NucleiScanStatus

## Supported Annotations
- nuclei.homelab.mortenolsen.pro/enabled
- nuclei.homelab.mortenolsen.pro/templates
- nuclei.homelab.mortenolsen.pro/severity
- nuclei.homelab.mortenolsen.pro/schedule
- nuclei.homelab.mortenolsen.pro/timeout
- nuclei.homelab.mortenolsen.pro/scanner-image

## RBAC Updates
- Added Job and Pod permissions for operator
- Created separate scanner service account with minimal permissions

## Documentation
- Updated README, user-guide, api.md, and Helm chart README
- Added example annotated Ingress resources
This commit is contained in:
Morten Olsen
2025-12-12 20:51:23 +01:00
parent 519ed32de3
commit 335689da22
22 changed files with 3060 additions and 245 deletions

View File

@@ -21,7 +21,8 @@ limitations under the License.
package v1alpha1
import (
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)
@@ -61,6 +62,25 @@ func (in *Finding) DeepCopy() *Finding {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *JobReference) DeepCopyInto(out *JobReference) {
*out = *in
if in.StartTime != nil {
in, out := &in.StartTime, &out.StartTime
*out = (*in).DeepCopy()
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobReference.
func (in *JobReference) DeepCopy() *JobReference {
if in == nil {
return nil
}
out := new(JobReference)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NucleiScan) DeepCopyInto(out *NucleiScan) {
*out = *in
@@ -139,6 +159,11 @@ func (in *NucleiScanSpec) DeepCopyInto(out *NucleiScanSpec) {
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.ScannerConfig != nil {
in, out := &in.ScannerConfig, &out.ScannerConfig
*out = new(ScannerConfig)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NucleiScanSpec.
@@ -156,7 +181,7 @@ func (in *NucleiScanStatus) DeepCopyInto(out *NucleiScanStatus) {
*out = *in
if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in))
*out = make([]metav1.Condition, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
@@ -189,6 +214,15 @@ func (in *NucleiScanStatus) DeepCopyInto(out *NucleiScanStatus) {
in, out := &in.LastRetryTime, &out.LastRetryTime
*out = (*in).DeepCopy()
}
if in.JobRef != nil {
in, out := &in.JobRef, &out.JobRef
*out = new(JobReference)
(*in).DeepCopyInto(*out)
}
if in.ScanStartTime != nil {
in, out := &in.ScanStartTime, &out.ScanStartTime
*out = (*in).DeepCopy()
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NucleiScanStatus.
@@ -223,6 +257,50 @@ func (in *ScanSummary) DeepCopy() *ScanSummary {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ScannerConfig) DeepCopyInto(out *ScannerConfig) {
*out = *in
if in.Resources != nil {
in, out := &in.Resources, &out.Resources
*out = new(v1.ResourceRequirements)
(*in).DeepCopyInto(*out)
}
if in.Timeout != nil {
in, out := &in.Timeout, &out.Timeout
*out = new(metav1.Duration)
**out = **in
}
if in.TemplateURLs != nil {
in, out := &in.TemplateURLs, &out.TemplateURLs
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.NodeSelector != nil {
in, out := &in.NodeSelector, &out.NodeSelector
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.Tolerations != nil {
in, out := &in.Tolerations, &out.Tolerations
*out = make([]v1.Toleration, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScannerConfig.
func (in *ScannerConfig) DeepCopy() *ScannerConfig {
if in == nil {
return nil
}
out := new(ScannerConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SourceReference) DeepCopyInto(out *SourceReference) {
*out = *in