Compare commits

..

5 Commits

Author SHA1 Message Date
Morten Olsen
9fe279b1b5 docs 2025-09-05 08:56:04 +02:00
Morten Olsen
63e0ef0909 add-coder 2025-09-05 07:04:15 +02:00
Morten Olsen
a44e3cb2be ssh port on gitea 2025-09-04 20:15:36 +02:00
Morten Olsen
8f5e148bb2 update 2025-09-04 18:22:33 +02:00
Morten Olsen
21262705a7 fixes 2025-09-03 23:06:59 +02:00
103 changed files with 1479 additions and 10 deletions

1
.gitignore vendored
View File

@@ -36,3 +36,4 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
/data/
/cloudflare.yaml
/secret.*.yaml

View File

@@ -6,5 +6,5 @@ spec:
environment: '{{ .Values.globals.environment }}'
redirectUris:
- path: /api/auth/oidc/callback
subdomain: bytestash
subdomain: '{{ .Values.subdomain }}'
matchingMode: strict

View File

@@ -0,0 +1,10 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: OidcClient
metadata:
name: '{{ .Release.Name }}'
spec:
environment: '{{ .Values.globals.environment }}'
redirectUris:
- path: /api/v2/users/oidc/callback
subdomain: '{{ .Values.subdomain }}'
matchingMode: strict

View File

@@ -0,0 +1,73 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}'
spec:
strategy:
type: Recreate
replicas: 1
selector:
matchLabels:
app: '{{ .Release.Name }}'
template:
metadata:
labels:
app: '{{ .Release.Name }}'
spec:
serviceAccountName: '{{ .Release.Name }}-serviceaccount'
containers:
- name: '{{ .Release.Name }}'
image: '{{ .Values.image.repository }}:{{ .Values.image.tag }}'
imagePullPolicy: '{{ .Values.image.pullPolicy }}'
ports:
- name: http
containerPort: 7080
protocol: TCP
livenessProbe:
tcpSocket:
port: http
readinessProbe:
tcpSocket:
port: http
volumeMounts:
- mountPath: /home/coder/.config
name: data
env:
- name: CODER_HTTP_ADDRESS
value: '0.0.0.0:7080'
- name: CODER_OIDC_ALLOWED_GROUPS
value: admin
- name: CODER_OIDC_GROUP_FIELD
value: groups
- name: CODER_ACCESS_URL
value: https://coder.olsen.cloud
- name: CODER_OIDC_ICON_URL
value: https://authentik.olsen.cloud/static/dist/assets/icons/icon.png
- name: CODER_DISABLE_PASSWORD_AUTH
value: 'true'
- name: CODER_OAUTH2_GITHUB_ALLOW_SIGNUPS
value: 'false'
- name: CODER_OIDC_SIGN_IN_TEXT
value: 'Sign in with OIDC'
- name: CODER_OIDC_SCOPES
value: openid,profile,email,offline_access
- name: CODER_OIDC_ISSUER_URL
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-client'
key: configurationIssuer
- name: CODER_OIDC_CLIENT_ID
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-client'
key: clientId
- name: CODER_OIDC_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-client'
key: clientSecret
volumes:
- name: data
persistentVolumeClaim:
claimName: '{{ .Release.Name }}-data'

View File

@@ -0,0 +1,11 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: HttpService
metadata:
name: '{{ .Release.Name }}'
spec:
environment: '{{ .Values.globals.environment }}'
subdomain: '{{ .Values.subdomain }}'
destination:
host: '{{ .Release.Name }}.{{ .Release.Namespace }}.svc.cluster.local'
port:
number: 80

View File

@@ -0,0 +1,21 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: '{{ .Release.Name }}-workspace-creator'
rules:
- apiGroups: [''] # "" indicates the core API group (for Pods, PVCs, Services)
resources: ['pods', 'pods/exec', 'pods/log', 'persistentvolumeclaims', 'services']
verbs: ['get', 'list', 'watch', 'create', 'update', 'patch', 'delete']
- apiGroups: ['apps'] # For Deployments, StatefulSets
resources: ['deployments', 'statefulsets']
verbs: ['get', 'list', 'watch', 'create', 'update', 'patch', 'delete']
- apiGroups: ['networking.k8s.io'] # For Ingresses
resources: ['ingresses']
verbs: ['get', 'list', 'watch', 'create', 'update', 'patch', 'delete']
- apiGroups: ['events.k8s.io'] # For events related to workspace activity
resources: ['events']
verbs: ['create', 'patch', 'update'] # Coder might create events for workspace lifecycle
# Add any other resources that Coder workspace templates might create (e.g., secrets, configmaps)
# - apiGroups: [""]
# resources: ["secrets", "configmaps"]
# verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]

View File

@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: '{{ .Release.Name }}-workspace-creator-binding'
namespace: '{{ .Release.Namespace }}'
subjects:
- kind: ServiceAccount
name: '{{ .Release.Name }}-serviceaccount'
namespace: '{{ .Release.Namespace }}'
roleRef:
kind: ClusterRole
name: '{{ .Release.Name }}-workspace-creator'
apiGroup: rbac.authorization.k8s.io

View File

@@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}'
labels:
app: '{{ .Release.Name }}'
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 7080
protocol: TCP
name: http
selector:
app: '{{ .Release.Name }}'

View File

@@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: '{{ .Release.Name }}-serviceaccount'
namespace: '{{ .Release.Namespace }}'

View File

@@ -0,0 +1,7 @@
globals:
environment: prod
image:
repository: ghcr.io/coder/coder
tag: latest
pullPolicy: IfNotPresent
subdomain: coder

View File

@@ -0,0 +1,3 @@
apiVersion: v2
version: 1.0.0
name: gitea

View File

@@ -0,0 +1,36 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}-runner'
labels:
app: '{{ .Release.Name }}-runner'
spec:
replicas: 1
selector:
matchLabels:
app: '{{ .Release.Name }}-runner'
template:
metadata:
labels:
app: '{{ .Release.Name }}-runner'
spec:
containers:
- name: '{{ .Release.Name }}-runner'
image: docker.io/gitea/act_runner:latest-dind-rootless
env:
- name: GITEA_INSTANCE_URL
value: '{{ .Release.Name }}'
- name: GITEA_RUNNER_NAME
- name: GITEA_RUNNER_REGISTRATION_TOKEN
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-runner'
key: registration_token
- name: DOCKER_HOST
value: tcp://localhost:2376
- name: DOCKER_CERT_PATH
value: /certs/client
- name: DOCKER_TLS_VERIFY
value: '1'
securityContext:
privileged: true

View File

@@ -0,0 +1,10 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: OidcClient
metadata:
name: '{{ .Release.Name }}'
spec:
environment: '{{ .Values.globals.environment }}'
redirectUris:
- path: /user/oauth2/Authentik/callback
subdomain: '{{ .Values.subdomain }}'
matchingMode: strict

View File

@@ -0,0 +1,99 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}'
spec:
strategy:
type: Recreate
replicas: 1
selector:
matchLabels:
app: '{{ .Release.Name }}'
template:
metadata:
labels:
app: '{{ .Release.Name }}'
spec:
containers:
- name: '{{ .Release.Name }}'
image: '{{ .Values.image.repository }}:{{ .Values.image.tag }}'
imagePullPolicy: '{{ .Values.image.pullPolicy }}'
ports:
- name: http
containerPort: 3000
protocol: TCP
- name: ssh
containerPort: 22
protocol: TCP
livenessProbe:
tcpSocket:
port: http
readinessProbe:
tcpSocket:
port: http
volumeMounts:
- mountPath: /data
name: data
env:
- name: TZ
value: '{{ .Values.globals.timezone }}'
- name: USER_UID
value: '1000'
- name: USER_GID
value: '1000'
- name: GITEA__service__REQUIRE_EXTERNAL_REGISTRATION_PASSWORD
value: 'true'
#- name: GITEA__service__ENABLE_BASIC_AUTHENTICATION
# value: 'true'
- name: GITEA__service__ENABLE_PASSWORD_SIGNIN_FORM
value: 'false'
- name: GITEA__service__DEFAULT_KEEP_EMAIL_PRIVATE
value: 'true'
- name: GITEA__service__DEFAULT_USER_IS_RESTRICTED
value: 'true'
- name: GITEA__service__DEFAULT_USER_VISIBILITY
value: 'private'
- name: GITEA__service__DEFAULT_ORG_VISIBILITY
value: 'private'
- name: GITEA__service__ALLOW_ONLY_EXTERNAL_REGISTRATION
value: 'true'
- name: GITEA__other__SHOW_FOOTER_POWERED_BY
value: 'false'
- name: GITEA__other__SHOW_FOOTER_TEMPLATE_LOAD_TIME
value: 'false'
- name: GITEA__other__SHOW_FOOTER_VERSION
value: 'false'
- name: GITEA__repository__ENABLE_PUSH_CREATE_USER
value: 'true'
- name: GITEA__repository__ENABLE_PUSH_CREATE_ORG
value: 'true'
- name: GITEA__openid__ENABLE_OPENID_SIGNIN
value: 'false'
- name: GITEA__openid__ENABLE_OPENID_SIGNUP
value: 'false'
- name: GITEA__database__DB_TYPE
value: postgres
- name: GITEA__database__NAME
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: database
- name: GITEA__database__HOST
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: host
- name: GITEA__database__USER
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: user
- name: GITEA__database__PASSWD
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: password
volumes:
- name: data
persistentVolumeClaim:
claimName: '{{ .Release.Name }}-data'

View File

@@ -0,0 +1,32 @@
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}'
labels:
app: '{{ .Release.Name }}'
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 3000
protocol: TCP
name: http
selector:
app: '{{ .Release.Name }}'
---
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}-ssh'
labels:
app: '{{ .Release.Name }}'
spec:
type: LoadBalancer
ports:
- port: 2202
targetPort: 22
protocol: TCP
name: ssh
selector:
app: '{{ .Release.Name }}'

View File

@@ -0,0 +1,8 @@
globals:
environment: prod
timezone: Europe/Amsterdam
image:
repository: docker.gitea.com/gitea
tag: latest
pullPolicy: IfNotPresent
subdomain: gitea

View File

@@ -0,0 +1,3 @@
apiVersion: v2
version: 1.0.0
name: headscale

View File

@@ -0,0 +1,10 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: OidcClient
metadata:
name: '{{ .Release.Name }}'
spec:
environment: '{{ .Values.globals.environment }}'
redirectUris:
- path: /oidc/callback
subdomain: '{{ .Values.subdomain }}'
matchingMode: strict

View File

@@ -0,0 +1,70 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: '{{ .Release.Name }}-config-template'
data:
config.yaml.template: |
server_url: ${PUBLIC_URL}
listen_addr: 0.0.0.0:8080
metrics_listen_addr: 0.0.0.0:9090
grpc_listen_addr: 0.0.0.0:50443
private_key_path: /var/lib/headscale/private_key # Path inside the container
noise:
private_key_path: /var/lib/headscale/noise_private_key # Path inside the container
listen_routes: false
base_domain: "${PUBLIC_URL}" # For client routes and DNS push.
derp:
server:
enabled: false
region_id: 999
region_code: "headscale"
region_name: "Headscale Embedded DERP"
stun_listen_addr: "0.0.0.0:3478"
automatically_add_embedded_derp_region: true
urls:
- https://controlplane.tailscale.com/derpmap/default
auto_update_enabled: true
update_frequency: 24h
oidc:
enabled: true
only_start_if_oidc_is_available: true
issuer: "${OIDC_ISSUER_URL}"
client_id: "${OIDC_CLIENT_ID}"
client_secret: "${OIDC_CLIENT_SECRET}"
scopes: ["openid", "profile", "email"]
redirect_url: "${PUBLIC_URL}/oidc/callback"
pkce:
enabled: true
method: S256
# DNS configuration
dns:
magic_dns: false
override_local_dns: true # Push Headscale's DNS settings to clients
ttl: 60
nameservers:
global:
- 1.1.1.1 # Cloudflare DNS
#- 10.43.0.10 # Replace with your ClusterIP for kube-dns/CoreDNS
# Domains to search for (e.g., for Kubernetes services)
search_domains:
- svc.cluster.local
- cluster.local
auto_create_users: true
oidc_user_property: preferred_username # Or 'email' or 'sub'
prefixes:
v4: 10.20.20.0/24 # Example: A /24 subnet for your VPN clients
database:
type: sqlite
sqlite:
path: /var/lib/headscale/db.sqlite

View File

@@ -0,0 +1,97 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}'
labels:
app: '{{ .Release.Name }}'
spec:
replicas: 1
selector:
matchLabels:
app: '{{ .Release.Name }}'
template:
metadata:
labels:
app: '{{ .Release.Name }}'
spec:
# To expose WireGuard UDP directly, we need a NodePort service.
# The Pod needs to be aware of the external port it's being exposed on.
# The easiest way to get WireGuard to listen on the correct port and make it
# externally accessible is to use `hostNetwork: true` for the UDP component,
# or by directly specifying the listen port in Headscale config if the NodePort is stable.
# OPTION 1: Best for simple homelab on bare metal where host network traffic isn't an issue
# hostNetwork: true # This makes the pod listen directly on the node's IPs
# dnsPolicy: ClusterFirstWithHostNet # Required if using hostNetwork
initContainers:
- name: generate-config
image: alpine/git # A small image with 'envsubst' available or easily installable
imagePullPolicy: IfNotPresent
command: ['sh', '-c']
args:
- |
# Install envsubst if it's not present (alpine/git may not have it by default)
apk update && apk add bash gettext
# Substitute environment variables into the template
# The vars are passed via `env` section below
envsubst < /config-template/config.yaml.template > /etc/headscale/config.yaml
mkdir -p /etc/headscale
# Optional: Verify the generated config
echo "--- Generated Headscale Configuration ---"
cat /etc/headscale/config.yaml
echo "---------------------------------------"
env:
# These are the variables that `envsubst` will look for and replace
- name: PUBLIC_URL
value: 'https://{{ .Values.subdomain }}.olsen.cloud'
- name: OIDC_ISSUER_URL
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-client'
key: configurationIssuer
- name: OIDC_CLIENT_ID
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-client'
key: clientId
- name: OIDC_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-client'
key: clientSecret
# Add any other variables used in config.yaml.template here
volumeMounts:
- name: config-template
mountPath: /config-template # Mount the ConfigMap as a volume
readOnly: true
- name: headscale-config
mountPath: /etc/headscale # Destination for the generated config
containers:
- name: '{{ .Release.Name }}'
image: headscale/headscale:latest # Use the official image
command: ['headscale', 'serve']
ports:
- name: http-api
containerPort: 8080
protocol: TCP
- name: wireguard-udp
containerPort: 41641
protocol: UDP
volumeMounts:
- name: headscale-data
mountPath: /var/lib/headscale
- name: headscale-config
mountPath: /etc/headscale
volumes:
- name: config-template
configMap:
name: '{{ .Release.Name }}-config-template'
- name: headscale-config
emptyDir: {}
- name: headscale-data
persistentVolumeClaim:
claimName: '{{ .Release.Name }}-data'

View File

@@ -0,0 +1,32 @@
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}'
labels:
app: '{{ .Release.Name }}'
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 8080
protocol: TCP
name: http
selector:
app: '{{ .Release.Name }}'
---
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}-headscale'
labels:
app: '{{ .Release.Name }}'
spec:
type: LoadBalancer
ports:
- port: 41641
targetPort: 41641
protocol: UDP
name: wireguard-udp
selector:
app: '{{ .Release.Name }}'

View File

@@ -0,0 +1,7 @@
globals:
environment: prod
image:
repository: headscale/headscale
tag: latest
pullPolicy: IfNotPresent
subdomain: headscale

View File

@@ -0,0 +1,3 @@
apiVersion: v2
version: 1.0.0
name: metamcp

View File

@@ -0,0 +1,6 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: PostgresDatabase
metadata:
name: '{{ .Release.Name }}'
spec:
environment: '{{ .Values.globals.environment }}'

View File

@@ -0,0 +1,79 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}'
spec:
strategy:
type: Recreate
replicas: 1
selector:
matchLabels:
app: '{{ .Release.Name }}'
template:
metadata:
labels:
app: '{{ .Release.Name }}'
spec:
containers:
- name: '{{ .Release.Name }}'
image: '{{ .Values.image.repository }}:{{ .Values.image.tag }}'
imagePullPolicy: '{{ .Values.image.pullPolicy }}'
ports:
- name: http
containerPort: 12008
protocol: TCP
livenessProbe:
tcpSocket:
port: http
readinessProbe:
tcpSocket:
port: http
volumeMounts:
- mountPath: /data
name: data
env:
- name: TZ
value: '{{ .Values.globals.timezone }}'
- name: APP_URL
value: https://metamcp.olsen.cloud # TODO: Change
- name: NEXT_PUBLIC_APP_URL
value: https://metamcp.olsen.cloud # TODO: Change
- name: BETTER_AUTH_SECRET
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-secrets'
key: betterauth
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: url
- name: POSTGRES_DB
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: database
- name: POSTGRES_HOST
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: host
- name: POSTGRES_PORT
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: port
- name: POSTGRES_USER
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: user
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: '{{ .Release.Name }}-pg-connection'
key: password
volumes:
- name: data
persistentVolumeClaim:
claimName: '{{ .Release.Name }}-data'

View File

@@ -0,0 +1,9 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: GenerateSecret
metadata:
name: '{{ .Release.Name }}-secrets'
spec:
fields:
- name: betterauth
encoding: base64
length: 64

View File

@@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}'
labels:
app: '{{ .Release.Name }}'
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 12008
protocol: TCP
name: http
selector:
app: '{{ .Release.Name }}'

View File

@@ -0,0 +1,8 @@
globals:
environment: prod
timezone: Europe/Amsterdam
image:
repository: ghcr.io/metatool-ai/metamcp
tag: latest
pullPolicy: IfNotPresent
subdomain: metamcp

View File

@@ -0,0 +1,11 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: ExternalHttpService
metadata:
name: '{{ .Release.Name }}'
spec:
environment: '{{ .Values.globals.environment }}'
subdomain: '{{ .Values.subdomain }}'
destination:
host: '{{ .Release.Name }}.{{ .Release.Namespace }}.svc.cluster.local'
port:
number: 80

View File

@@ -0,0 +1,11 @@
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: '{{ .Release.Name }}-data'
spec:
accessModes:
- 'ReadWriteOnce'
resources:
requests:
storage: '1Gi'
storageClassName: '{{ .Values.globals.environment }}'

View File

@@ -0,0 +1,6 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: PostgresDatabase
metadata:
name: '{{ .Release.Name }}'
spec:
environment: '{{ .Values.globals.environment }}'

View File

@@ -0,0 +1,11 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: ExternalHttpService
metadata:
name: '{{ .Release.Name }}'
spec:
environment: '{{ .Values.globals.environment }}'
subdomain: '{{ .Values.subdomain }}'
destination:
host: '{{ .Release.Name }}.{{ .Release.Namespace }}.svc.cluster.local'
port:
number: 80

View File

@@ -0,0 +1,11 @@
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: '{{ .Release.Name }}-data'
spec:
accessModes:
- 'ReadWriteOnce'
resources:
requests:
storage: '1Gi'
storageClassName: '{{ .Values.globals.environment }}'

View File

@@ -0,0 +1,11 @@
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: '{{ .Release.Name }}-data'
spec:
accessModes:
- 'ReadWriteOnce'
resources:
requests:
storage: '1Gi'
storageClassName: '{{ .Values.globals.environment }}'

View File

@@ -0,0 +1,3 @@
apiVersion: v2
version: 1.0.0
name: openwebui

View File

@@ -0,0 +1,11 @@
apiVersion: homelab.mortenolsen.pro/v1
kind: ExternalHttpService
metadata:
name: '{{ .Release.Name }}'
spec:
environment: '{{ .Values.globals.environment }}'
subdomain: '{{ .Values.subdomain }}'
destination:
host: '{{ .Release.Name }}.{{ .Release.Namespace }}.svc.cluster.local'
port:
number: 80

View File

@@ -0,0 +1,11 @@
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: '{{ .Release.Name }}-data'
spec:
accessModes:
- 'ReadWriteOnce'
resources:
requests:
storage: '1Gi'
storageClassName: '{{ .Values.globals.environment }}'

476
docs/monitoring.md Normal file
View File

@@ -0,0 +1,476 @@
# Home Kubernetes Cluster Setup: Monitoring & Security Quickstart
This guide provides a practical, lightweight setup for monitoring and security on your home Kubernetes cluster. It uses Helm for easy installation and focuses on essential features with minimal complexity.
## Overview
This setup includes:
* **Monitoring:** Prometheus + node-exporter + kube-state-metrics + Grafana (via the `kube-prometheus-stack` Helm chart).
* **Image Scanning & Supply-Chain:** Trivy (Trivy Operator) for automated in-cluster image vulnerability scanning.
* **Policy / Admission Control / Pod Security:** Kyverno for policy enforcement and Kubernetes Pod Security Admission (PSA) for baseline security.
* **Runtime Security / IDS:** Falco to detect suspicious syscalls and pod activity.
* **Network Segmentation:** Calico (or Cilium) CNI with basic NetworkPolicy configuration.
* **Ad-Hoc Checks:** kube-bench (CIS benchmarks), kube-linter/kube-score (static analysis), and kube-hunter (penetration testing).
## Prerequisites
* A functional Kubernetes cluster (managed or self-hosted).
* `kubectl` installed and configured to connect to your cluster.
* Helm v3 installed.
## Installation
These instructions assume you have `kubectl` and Helm set up and authenticated to your cluster.
### 1. Monitoring (Prometheus + Grafana)
* Add the Prometheus community Helm repository:
```bash
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
```
* Create the `monitoring` namespace and install the `kube-prometheus-stack` chart:
```bash
kubectl create ns monitoring
helm install kube-prometheus prometheus-community/kube-prometheus-stack --namespace monitoring
```
*Optional*: Customize the installation by creating a `values.yaml` file to configure persistence, resource limits, and scrape intervals. See *Configuration* below for a potential `values.yaml` you can adapt.
* Access Grafana:
```bash
kubectl -n monitoring port-forward svc/kube-prometheus-grafana 3000:80
```
Open `http://localhost:3000` in your browser. The default `admin` user password can be found in the chart's secrets (check the Helm chart documentation).
This provides node-exporter, kube-state-metrics, a Prometheus server, Alertmanager, and pre-built dashboards for your cluster.
### 2. Image Scanning (Trivy Operator)
* Add the Aqua Security Helm repository:
```bash
helm repo add aqua https://aquasecurity.github.io/helm-charts
helm repo update
```
* Create the `trivy-system` namespace and install the `trivy-operator` chart:
```bash
kubectl create ns trivy-system
helm install trivy-operator aqua/trivy-operator --namespace trivy-system
```
Trivy Operator creates `VulnerabilityReport` and `ConfigAuditReport` CRDs. It scans images running in the cluster for vulnerabilities.
### 3. Policy Admission (Kyverno)
* Create the `kyverno` namespace and install Kyverno:
```bash
kubectl create ns kyverno
kubectl apply -f https://github.com/kyverno/kyverno/releases/latest/download/install.yaml
```
* Apply the example `ClusterPolicy` to deny privileged containers and hostPath mounts:
```yaml
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: deny-privileged-and-hostpath
spec:
rules:
- name: deny-privileged
match:
resources:
kinds: ["Pod","PodTemplate","CronJob","Job","Deployment","StatefulSet"]
validate:
message: "Privileged containers are not allowed"
deny:
conditions:
- key: "{{ request.object.spec.containers[].securityContext.privileged }}"
operator: Equals
value: true
- name: deny-hostpath
match:
resources:
kinds: ["Pod","PodTemplate","Deployment","StatefulSet"]
validate:
message: "hostPath volumes are not allowed"
pattern:
spec:
volumes:
- "*":
hostPath: null
```
Save the above as `kyverno-policy.yaml` and apply it:
```bash
kubectl apply -f kyverno-policy.yaml
```
Adapt the `match` section to target specific workload types. See *Example Kyverno Policy* below.
### 4. Pod Security Admission (PSA)
* Apply the `baseline` Pod Security Standard to the `default` namespace:
```bash
kubectl label ns default pod-security.kubernetes.io/enforce=baseline
```
* For a stricter security posture, use the `restricted` profile:
```bash
kubectl label ns default pod-security.kubernetes.io/enforce=restricted
```
PSA provides controls like preventing privileged containers and restricting host networking.
### 5. Runtime Detection (Falco)
* Add the Falco Helm repository:
```bash
helm repo add falcosecurity https://falcosecurity.github.io/charts
helm repo update
```
* Create the `falco` namespace and install the `falco` chart:
```bash
kubectl create ns falco
helm install falco falcosecurity/falco --namespace falco
```
Falco detects suspicious container behavior and system calls.
### 6. Network Policy & CNI
* If you haven't already, install a CNI that supports NetworkPolicy, such as Calico:
```bash
kubectl apply -f https://docs.projectcalico.org/manifests/calico.yaml
```
Alternatively, consider Cilium.
* Implement a default-deny NetworkPolicy:
```yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: default-deny
namespace: my-namespace
spec:
podSelector: {}
policyTypes:
- Ingress
- Egress
```
Save the above as `default-deny.yaml` and apply it to your namespace:
```bash
kubectl apply -f default-deny.yaml
```
Follow this up with explicit `allow` policies for necessary services.
### 7. Cluster Hardening & Scans
* **kube-bench (CIS Benchmarks):**
```bash
kubectl run --rm -it --image aquasec/kube-bench:latest kube-bench -- /kube-bench --version 1.23
```
Refer to the kube-bench documentation for running as a Job or Pod.
* **kube-linter / kube-score (Static Manifest Checks):**
Install the CLI tool locally and analyze your Kubernetes manifests.
* **kube-hunter (Penetration Testing):**
```bash
docker run aquasec/kube-hunter:latest --remote <K8S_API_ENDPOINT>
```
## Configuration
This section provides example configuration files and tips to customize the setup for a home Kubernetes cluster.
### Example `values.yaml` for `kube-prometheus-stack`
This reduces resource usage and avoids the need for external object storage for Alertmanager, which is not needed at home. It disables default dashboards you might not need initially and cuts down some Prometheus retention.
```yaml
# values.yaml for kube-prometheus-stack
prometheus:
prometheusSpec:
# reduce resource rqts / limits
resources:
requests:
memory: 1Gi
cpu: 200m
limits:
memory: 2Gi
cpu: 500m
# Reduce storage retention
retention: 7d
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: "local-path" # Or your storage class
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi # adjust as needed
alertmanager:
enabled: false # for quick home setup, send directly to telegram etc.
grafana:
enabled: true
defaultDashboardsEnabled: false # Disable default dashboards
sidecar:
dashboards:
enabled: true
provider:
folders:
fromConfigMap: true # Load custom dashboards from ConfigMaps
kube-state-metrics:
enabled: true
nodeExporter:
enabled: true
```
To use this configuration, save it as `values.yaml` and run:
```bash
helm install kube-prometheus prometheus-community/kube-prometheus-stack --namespace monitoring -f values.yaml
```
Adapt the `storageClassName` and storage amounts to your environment.
### Example Kyverno Policy - Disallow Root User / Require Distroless
This example expands on the previous policy. It requires images not run as UID 0 and suggests distroless images. It still requires privilege escalation to be forbidden:
```yaml
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: require-non-root-user-and-distroless
annotations:
policies.kyverno.io/title: Require Non-Root User and Distroless Images
policies.kyverno.io/category: Security
policies.kyverno.io/severity: medium
policies.kyverno.io/subject: Pod
policies.kyverno.io/description: >-
Containers should not run as root, and ideally, be based on Distroless
images where possible. This policy requires that containers define
`runAsUser`, and that `runAsUser` is not `0`. It also generates a warning
if the image is not based on a distroless image, although does not reject
the deployment.
spec:
validationFailureAction: Enforce
rules:
- name: check-runasnonroot
match:
any:
- resources:
kinds:
- Pod
validate:
message: "Containers must not run as root. Specify a non-zero runAsUser in securityContext."
pattern:
spec:
containers:
- securityContext:
runAsUser: "!0" # not equal to zero
- name: check-allowprivilegeescalation
match:
any:
- resources:
kinds:
- Pod
validate:
message: "Containers must set allowPrivilegeEscalation to false."
pattern:
spec:
containers:
- securityContext:
allowPrivilegeEscalation: "false"
- name: warn-distroless
match:
any:
- resources:
kinds:
- Pod
verifyImages:
- imageReferences:
- "*" # all images
attestations:
- policy:
subjects:
- name: distroless
conditions:
all:
- key: "ghcr.io/distroless/static:latest" # Example - Check if the image is distroless. You can use wildcards
operator: In
value: "{{ image.repoDigests }}"
# You can add other keys and values to check
mutate:
overlay:
metadata:
annotations:
"image.distroless.warn": "This image isn't distroless -- see https://github.com/GoogleContainerTools/distroless"
```
### Alertmanager to Telegram
1. **Create a Telegram Bot:** Search for `@BotFather` on Telegram. Use the `/newbot` command. Give your bot a name and a unique username. BotFather will give you the bot's API token.
2. **Get your Telegram Chat ID:** Send a message to your bot. Then, in a browser, go to `https://api.telegram.org/bot<YOUR_BOT_API_TOKEN>/getUpdates` (replace `<YOUR_BOT_API_TOKEN>`). The `chat.id` value in the JSON response is your chat ID.
3. **Create a Secret in Kubernetes:**
```bash
kubectl create secret generic telegram-secrets \
--from-literal=bot_token="<YOUR_BOT_API_TOKEN>" \
--from-literal=chat_id="<YOUR_CHAT_ID>"
```
Replace the placeholders with the correct values.
4. **Add Alertmanager Configuration:**
You'll need to patch the default Alertmanager configuration provided by `kube-prometheus-stack`. Because we disabled the Alertmanager component from the chart for simplicitly's sake, we'll instead rely on defining an additional prometheusRule that sends alerts to a webhook (and have a small sidecar container forward them to telegram).
Example:
```yaml
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: promethus-to-telegram
namespace: monitoring
spec:
groups:
- name: kubernetes-home-cluster
rules:
- alert: PrometheusToTelegramAlert
annotations:
description: 'Alert sent from Prometheus goes to telegram'
expr: vector(1)
labels:
severity: critical
for: 1s
actions:
- name: SendToTelegramAction
url: 'http://localhost:8080/message'
parameters:
text: Alert from Prometheus: {{ .Alerts.Firing | len }} firing alert{{ if gt (len .Alerts.Firing) 1 }}s{{ end }}.\nSeverity: {{ .CommonLabels.severity }}\nDescription: {{ .CommonAnnotations.description }}
```
Now you will create a deployment that runs a small webhook server forwarding these alerts to telegram:
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-telegram
namespace: monitoring
spec:
selector:
matchLabels:
app: prometheus-telegram
replicas: 1
template:
metadata:
labels:
app: prometheus-telegram
spec:
containers:
- name: webhook
image: nginx
ports:
- containerPort: 8080
- name: telegram-forwarder
image: alpine/curl
command: ["/bin/sh"]
args:
- "-c"
- |
while true; do
nc -l -p 8080 | sed 's/text=/text=Alert from Prometheus: /g' | curl -sS --fail -X POST "https://api.telegram.org/bot$(TELEGRAM_BOT_TOKEN)/sendMessage" -d chat_id=$(TELEGRAM_CHAT_ID) -d "$$(cat)"
sleep 1;
done
env:
- name: TELEGRAM_BOT_TOKEN
valueFrom:
secretKeyRef:
name: telegram-secrets
key: bot_token
- name: TELEGRAM_CHAT_ID
valueFrom:
secretKeyRef:
name: telegram-secrets
key: chat_id
```
**Explanation:**
* It creates an Nginx pod for a HTTP listener to avoid unnecessary security errors in Promethues,
* The `telegram-forwarder` container uses `curl` and `nc` to forward the POST from Prometheus to the Telegram API, using the secrets for authentication.
## Operational Tips
* **Resource Management:** Set resource limits and requests for components, especially Prometheus and Grafana. Adjust scrape intervals for Prometheus to reduce load.
* **Persistence:** Use persistent volumes for Grafana and Prometheus to preserve dashboards and historical data.
* **Alerting:** Configure Alertmanager with a Telegram or Discord webhook for notifications. This is *simpler* than email for home setups.
* **Trivy & Image Blocking:** To automatically block vulnerable images, integrate Trivy with admission webhooks (using Kyverno to reject deployments based on Trivy reports).
* **Backups:** Regularly back up etcd (if self-hosting the control plane) and potentially Prometheus/Grafana data.
## Getting Started Quickly
Follow this installation order:
1. Install your `CNI`.
2. Install `kube-prometheus-stack`, using `values.yaml` to reduce resources.
3. Install Grafana and import dashboards.
4. Enable PSA on namespaces.
5. Install Kyverno and create deny policies.
6. Install Trivy Operator for image scanning visibility.
7. Install Falco for runtime detection.
8. Run `kube-bench` and `kube-linter` for initial assessment.
## Useful Resources
* [kube-prometheus-stack (Helm)](https://github.com/prometheus-community/helm-charts)
* [trivy-operator](https://github.com/aquasecurity/trivy-operator)
* [Kyverno](https://kyverno.io/)
* [Falco](https://falco.org/)
* [Calico CNI](https://www.tigera.io/project-calico/)
* [Aqua kube-hunter, kube-bench, kube-linter](https://www.aquasec.com/)
This README provides a solid foundation for setting up monitoring and security on your home Kubernetes cluster. Adapt the configurations and policies to your specific needs and experiment!

Some files were not shown because too many files have changed in this diff Show More