fix: run job in operator namespace to avoid permission issues

This commit is contained in:
Morten Olsen
2025-12-12 21:49:15 +01:00
parent 0340d01e20
commit 1677d02aa7
8 changed files with 571 additions and 316 deletions

View File

@@ -77,6 +77,9 @@ type JobReference struct {
// Name of the Job
Name string `json:"name"`
// Namespace of the Job (may differ from NucleiScan namespace)
Namespace string `json:"namespace"`
// UID of the Job
UID string `json:"uid"`

View File

@@ -5,8 +5,6 @@ metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.19.0
name: nucleiscans.nuclei.homelab.mortenolsen.pro
labels:
{{- include "nuclei-operator.labels" . | nindent 4 }}
spec:
group: nuclei.homelab.mortenolsen.pro
names:
@@ -57,6 +55,127 @@ spec:
spec:
description: NucleiScanSpec defines the desired state of NucleiScan
properties:
scannerConfig:
description: ScannerConfig allows overriding scanner settings for
this scan
properties:
image:
description: Image overrides the default scanner image
type: string
nodeSelector:
additionalProperties:
type: string
description: NodeSelector for scanner pod scheduling
type: object
resources:
description: Resources defines resource requirements for the scanner
pod
properties:
claims:
description: |-
Claims lists the names of resources, defined in spec.resourceClaims,
that are used by this container.
This field depends on the
DynamicResourceAllocation feature gate.
This field is immutable. It can only be set for containers.
items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties:
name:
description: |-
Name must match the name of one entry in pod.spec.resourceClaims of
the Pod where this field is used. It makes that resource available
inside a container.
type: string
request:
description: |-
Request is the name chosen for a request in the referenced claim.
If empty, everything from the claim is made available, otherwise
only the result of this request.
type: string
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: |-
Limits describes the maximum amount of compute resources allowed.
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: |-
Requests describes the minimum amount of compute resources required.
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
otherwise to an implementation-defined value. Requests cannot exceed Limits.
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
type: object
type: object
templateURLs:
description: TemplateURLs specifies additional template repositories
to clone
items:
type: string
type: array
timeout:
description: Timeout overrides the default scan timeout
type: string
tolerations:
description: Tolerations for scanner pod scheduling
items:
description: |-
The pod this Toleration is attached to tolerates any taint that matches
the triple <key,value,effect> using the matching operator <operator>.
properties:
effect:
description: |-
Effect indicates the taint effect to match. Empty means match all taint effects.
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: |-
Key is the taint key that the toleration applies to. Empty means match all taint keys.
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
type: string
operator:
description: |-
Operator represents a key's relationship to the value.
Valid operators are Exists and Equal. Defaults to Equal.
Exists is equivalent to wildcard for value, so that a pod can
tolerate all taints of a particular category.
type: string
tolerationSeconds:
description: |-
TolerationSeconds represents the period of time the toleration (which must be
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
it is not set, which means tolerate the taint forever (do not evict). Zero and
negative values will be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: |-
Value is the taint value the toleration matches to.
If the operator is Exists, the value should be empty, otherwise just a regular string.
type: string
type: object
type: array
type: object
schedule:
description: |-
Schedule for periodic rescanning in cron format
@@ -64,12 +183,17 @@ spec:
type: string
severity:
description: Severity filters scan results by severity level
enum:
- info
- low
- medium
- high
- critical
items:
type: string
type: array
sourceRef:
description:
SourceRef references the Ingress or VirtualService being
description: SourceRef references the Ingress or VirtualService being
scanned
properties:
apiVersion:
@@ -101,8 +225,7 @@ spec:
description: Suspend prevents scheduled scans from running
type: boolean
targets:
description:
Targets is the list of URLs to scan, extracted from the
description: Targets is the list of URLs to scan, extracted from the
source resource
items:
type: string
@@ -129,8 +252,7 @@ spec:
conditions:
description: Conditions represent the latest available observations
items:
description:
Condition contains details for one aspect of the current
description: Condition contains details for one aspect of the current
state of this API Resource.
properties:
lastTransitionTime:
@@ -198,8 +320,7 @@ spec:
description: Description provides details about the finding
type: string
extractedResults:
description:
ExtractedResults contains any data extracted by
description: ExtractedResults contains any data extracted by
the template
items:
type: string
@@ -208,8 +329,7 @@ spec:
description: Host that was scanned
type: string
matchedAt:
description:
MatchedAt is the specific URL or endpoint where
description: MatchedAt is the specific URL or endpoint where
the issue was found
type: string
metadata:
@@ -217,8 +337,7 @@ spec:
type: object
x-kubernetes-preserve-unknown-fields: true
reference:
description:
Reference contains URLs to additional information
description: Reference contains URLs to additional information
about the finding
items:
type: string
@@ -251,12 +370,36 @@ spec:
- timestamp
type: object
type: array
jobRef:
description: JobRef references the current or last scanner job
properties:
name:
description: Name of the Job
type: string
namespace:
description: Namespace of the Job (may differ from NucleiScan
namespace)
type: string
podName:
description: PodName is the name of the scanner pod (for log retrieval)
type: string
startTime:
description: StartTime when the job was created
format: date-time
type: string
uid:
description: UID of the Job
type: string
required:
- name
- namespace
- uid
type: object
lastError:
description: LastError contains the error message if the scan failed
type: string
lastRetryTime:
description:
LastRetryTime is when the last availability check retry
description: LastRetryTime is when the last availability check retry
occurred
format: date-time
type: string
@@ -265,14 +408,12 @@ spec:
format: date-time
type: string
nextScheduledTime:
description:
NextScheduledTime is when the next scheduled scan will
description: NextScheduledTime is when the next scheduled scan will
run
format: date-time
type: string
observedGeneration:
description:
ObservedGeneration is the generation observed by the
description: ObservedGeneration is the generation observed by the
controller
format: int64
type: integer
@@ -289,6 +430,11 @@ spec:
RetryCount tracks the number of consecutive availability check retries
Used for exponential backoff when waiting for targets
type: integer
scanStartTime:
description: ScanStartTime is when the scanner pod actually started
scanning
format: date-time
type: string
summary:
description: Summary provides aggregated scan statistics
properties:
@@ -299,13 +445,11 @@ spec:
findingsBySeverity:
additionalProperties:
type: integer
description:
FindingsBySeverity breaks down findings by severity
description: FindingsBySeverity breaks down findings by severity
level
type: object
targetsScanned:
description:
TargetsScanned is the number of targets that were
description: TargetsScanned is the number of targets that were
scanned
type: integer
totalFindings:

View File

@@ -70,6 +70,8 @@ spec:
value: {{ .Values.scanner.ttlAfterFinished | quote }}
- name: SCANNER_SERVICE_ACCOUNT
value: {{ include "nuclei-operator.fullname" . }}-scanner
- name: OPERATOR_NAMESPACE
value: {{ .Release.Namespace | quote }}
{{- if .Values.scanner.defaultTemplates }}
- name: DEFAULT_TEMPLATES
value: {{ join "," .Values.scanner.defaultTemplates | quote }}

View File

@@ -241,6 +241,16 @@ func main() {
scannerServiceAccount = "nuclei-scanner"
}
operatorNamespace := os.Getenv("OPERATOR_NAMESPACE")
if operatorNamespace == "" {
// Try to read from the downward API file
if data, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace"); err == nil {
operatorNamespace = string(data)
} else {
operatorNamespace = "nuclei-operator-system"
}
}
defaultTemplates := []string{}
if v := os.Getenv("DEFAULT_TEMPLATES"); v != "" {
defaultTemplates = strings.Split(v, ",")
@@ -259,6 +269,7 @@ func main() {
BackoffLimit: 2,
MaxConcurrent: maxConcurrentScans,
ServiceAccountName: scannerServiceAccount,
OperatorNamespace: operatorNamespace,
DefaultResources: jobmanager.DefaultConfig().DefaultResources,
DefaultTemplates: defaultTemplates,
DefaultSeverity: defaultSeverity,

View File

@@ -376,6 +376,10 @@ spec:
name:
description: Name of the Job
type: string
namespace:
description: Namespace of the Job (may differ from NucleiScan
namespace)
type: string
podName:
description: PodName is the name of the scanner pod (for log retrieval)
type: string
@@ -388,6 +392,7 @@ spec:
type: string
required:
- name
- namespace
- uid
type: object
lastError:

View File

@@ -257,8 +257,13 @@ func (r *NucleiScanReconciler) handleDeletion(ctx context.Context, nucleiScan *n
// Clean up any running scanner job
if nucleiScan.Status.JobRef != nil {
log.Info("Deleting scanner job", "job", nucleiScan.Status.JobRef.Name)
if err := r.JobManager.DeleteJob(ctx, nucleiScan.Status.JobRef.Name, nucleiScan.Namespace); err != nil {
jobNamespace := nucleiScan.Status.JobRef.Namespace
if jobNamespace == "" {
// Fallback for backwards compatibility
jobNamespace = nucleiScan.Namespace
}
log.Info("Deleting scanner job", "job", nucleiScan.Status.JobRef.Name, "namespace", jobNamespace)
if err := r.JobManager.DeleteJob(ctx, nucleiScan.Status.JobRef.Name, jobNamespace); err != nil {
if !apierrors.IsNotFound(err) {
log.Error(err, "Failed to delete scanner job", "job", nucleiScan.Status.JobRef.Name)
}
@@ -324,6 +329,7 @@ func (r *NucleiScanReconciler) handlePendingPhase(ctx context.Context, nucleiSca
nucleiScan.Status.Phase = nucleiv1alpha1.ScanPhaseRunning
nucleiScan.Status.JobRef = &nucleiv1alpha1.JobReference{
Name: job.Name,
Namespace: job.Namespace,
UID: string(job.UID),
StartTime: &now,
}
@@ -401,8 +407,13 @@ func (r *NucleiScanReconciler) handleRunningPhase(ctx context.Context, nucleiSca
return ctrl.Result{Requeue: true}, nil
}
// Get the job
job, err := r.JobManager.GetJob(ctx, nucleiScan.Status.JobRef.Name, nucleiScan.Namespace)
// Get the job - use namespace from JobRef (may be different from scan namespace)
jobNamespace := nucleiScan.Status.JobRef.Namespace
if jobNamespace == "" {
// Fallback for backwards compatibility
jobNamespace = nucleiScan.Namespace
}
job, err := r.JobManager.GetJob(ctx, nucleiScan.Status.JobRef.Name, jobNamespace)
if err != nil {
if apierrors.IsNotFound(err) {
logger.Info("Scanner job not found, resetting to Pending")

View File

@@ -82,6 +82,9 @@ type Config struct {
// ServiceAccountName is the service account to use for scanner pods
ServiceAccountName string
// OperatorNamespace is the namespace where the operator runs and where scanner jobs will be created
OperatorNamespace string
// DefaultResources are the default resource requirements for scanner pods
DefaultResources corev1.ResourceRequirements
@@ -101,6 +104,7 @@ func DefaultConfig() Config {
BackoffLimit: DefaultBackoffLimit,
MaxConcurrent: 5,
ServiceAccountName: "nuclei-scanner",
OperatorNamespace: "nuclei-operator-system",
DefaultResources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("100m"),
@@ -136,14 +140,22 @@ func (m *JobManager) CreateScanJob(ctx context.Context, scan *nucleiv1alpha1.Nuc
job := m.buildJob(scan)
// Set owner reference so the job is garbage collected when the scan is deleted
// Only set owner reference if the job is in the same namespace as the scan
// Cross-namespace owner references are not allowed in Kubernetes
if job.Namespace == scan.Namespace {
if err := controllerutil.SetControllerReference(scan, job, m.Scheme); err != nil {
return nil, fmt.Errorf("failed to set controller reference: %w", err)
}
}
// When job is in a different namespace (operator namespace), we rely on:
// 1. TTLSecondsAfterFinished for automatic cleanup of completed jobs
// 2. Labels (LabelScanName, LabelScanNamespace) to track which scan the job belongs to
// 3. CleanupOrphanedJobs to clean up jobs whose scans no longer exist
logger.Info("Creating scanner job",
"job", job.Name,
"namespace", job.Namespace,
"jobNamespace", job.Namespace,
"scanNamespace", scan.Namespace,
"image", job.Spec.Template.Spec.Containers[0].Image,
"targets", len(scan.Spec.Targets))
@@ -277,15 +289,42 @@ func (m *JobManager) CleanupOrphanedJobs(ctx context.Context) error {
}
for _, job := range jobList.Items {
// Check if owner reference exists and the owner still exists
ownerRef := metav1.GetControllerOf(&job)
if ownerRef == nil {
logger.Info("Deleting orphaned job without owner", "job", job.Name, "namespace", job.Namespace)
// Check if the associated NucleiScan still exists using labels
scanName := job.Labels[LabelScanName]
scanNamespace := job.Labels[LabelScanNamespace]
if scanName != "" && scanNamespace != "" {
// Try to get the associated NucleiScan
scan := &nucleiv1alpha1.NucleiScan{}
err := m.Get(ctx, types.NamespacedName{Name: scanName, Namespace: scanNamespace}, scan)
if err != nil {
if apierrors.IsNotFound(err) {
// The scan no longer exists - delete the job
logger.Info("Deleting orphaned job (scan not found)",
"job", job.Name,
"namespace", job.Namespace,
"scanName", scanName,
"scanNamespace", scanNamespace)
if err := m.DeleteJob(ctx, job.Name, job.Namespace); err != nil && !apierrors.IsNotFound(err) {
logger.Error(err, "Failed to delete orphaned job", "job", job.Name)
}
continue
}
// Other error - log and continue
logger.Error(err, "Failed to check if scan exists", "scanName", scanName, "scanNamespace", scanNamespace)
continue
}
} else {
// Job doesn't have proper labels - check owner reference as fallback
ownerRef := metav1.GetControllerOf(&job)
if ownerRef == nil {
logger.Info("Deleting orphaned job without owner or labels", "job", job.Name, "namespace", job.Namespace)
if err := m.DeleteJob(ctx, job.Name, job.Namespace); err != nil && !apierrors.IsNotFound(err) {
logger.Error(err, "Failed to delete orphaned job", "job", job.Name)
}
continue
}
}
// Check if the job is stuck (running longer than 2x the timeout)
if job.Status.StartTime != nil {
@@ -305,12 +344,18 @@ func (m *JobManager) CleanupOrphanedJobs(ctx context.Context) error {
// buildJob creates a Job specification for the given NucleiScan
func (m *JobManager) buildJob(scan *nucleiv1alpha1.NucleiScan) *batchv1.Job {
// Generate a unique job name
jobName := fmt.Sprintf("nucleiscan-%s-%d", scan.Name, time.Now().Unix())
// Generate a unique job name that includes the scan namespace to avoid collisions
jobName := fmt.Sprintf("nucleiscan-%s-%s-%d", scan.Namespace, scan.Name, time.Now().Unix())
if len(jobName) > 63 {
jobName = jobName[:63]
}
// Determine the namespace for the job - use operator namespace if configured
jobNamespace := m.Config.OperatorNamespace
if jobNamespace == "" {
jobNamespace = scan.Namespace
}
// Determine the scanner image
image := m.Config.ScannerImage
if scan.Spec.ScannerConfig != nil && scan.Spec.ScannerConfig.Image != "" {
@@ -360,7 +405,7 @@ func (m *JobManager) buildJob(scan *nucleiv1alpha1.NucleiScan) *batchv1.Job {
job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: jobName,
Namespace: scan.Namespace,
Namespace: jobNamespace,
Labels: labels,
},
Spec: batchv1.JobSpec{

View File

@@ -43,14 +43,22 @@ func TestBuildJob(t *testing.T) {
job := manager.buildJob(scan)
// Verify job name prefix
// Verify job name prefix - should include scan namespace to avoid collisions
if len(job.Name) == 0 {
t.Error("Job name should not be empty")
}
// Verify namespace
if job.Namespace != "default" {
t.Errorf("Expected namespace 'default', got '%s'", job.Namespace)
// Verify namespace - job should be created in operator namespace
if job.Namespace != config.OperatorNamespace {
t.Errorf("Expected namespace '%s', got '%s'", config.OperatorNamespace, job.Namespace)
}
// Verify scan labels are set correctly for cross-namespace tracking
if job.Labels[LabelScanName] != scan.Name {
t.Errorf("Expected scan name label '%s', got '%s'", scan.Name, job.Labels[LabelScanName])
}
if job.Labels[LabelScanNamespace] != scan.Namespace {
t.Errorf("Expected scan namespace label '%s', got '%s'", scan.Namespace, job.Labels[LabelScanNamespace])
}
// Verify labels
@@ -115,3 +123,29 @@ func TestBuildJobWithCustomConfig(t *testing.T) {
t.Errorf("Expected deadline %d, got %d", expectedDeadline, *job.Spec.ActiveDeadlineSeconds)
}
}
func TestBuildJobInSameNamespace(t *testing.T) {
config := DefaultConfig()
// Clear operator namespace to test same-namespace behavior
config.OperatorNamespace = ""
manager := &JobManager{
Config: config,
}
scan := &nucleiv1alpha1.NucleiScan{
ObjectMeta: metav1.ObjectMeta{
Name: "test-scan",
Namespace: "my-namespace",
},
Spec: nucleiv1alpha1.NucleiScanSpec{
Targets: []string{"https://example.com"},
},
}
job := manager.buildJob(scan)
// Verify namespace - when operator namespace is empty, job should be in scan's namespace
if job.Namespace != scan.Namespace {
t.Errorf("Expected namespace '%s', got '%s'", scan.Namespace, job.Namespace)
}
}