From c50ce95086fc0474c40c8c12abd8e83767e7cedf Mon Sep 17 00:00:00 2001 From: Morten Olsen Date: Fri, 2 Jan 2026 14:40:48 +0100 Subject: [PATCH] start immich --- apps/charts/immich/templates/database.yaml | 3 +- apps/charts/immich/templates/deployment.yaml | 4 +- scripts/sync_pvc_from_host.py | 357 +++++++++++++++++++ 3 files changed, 359 insertions(+), 5 deletions(-) create mode 100755 scripts/sync_pvc_from_host.py diff --git a/apps/charts/immich/templates/database.yaml b/apps/charts/immich/templates/database.yaml index 4a67382..67839cd 100644 --- a/apps/charts/immich/templates/database.yaml +++ b/apps/charts/immich/templates/database.yaml @@ -19,8 +19,7 @@ metadata: spec: strategy: type: Recreate - replicas: 0 - revisionHistoryLimit: 0 + replicas: 1 selector: matchLabels: app: "{{ .Release.Name }}-postgres" diff --git a/apps/charts/immich/templates/deployment.yaml b/apps/charts/immich/templates/deployment.yaml index d370322..c648773 100644 --- a/apps/charts/immich/templates/deployment.yaml +++ b/apps/charts/immich/templates/deployment.yaml @@ -5,8 +5,7 @@ metadata: spec: strategy: type: Recreate - replicas: 0 - revisionHistoryLimit: 0 + replicas: 1 selector: matchLabels: app: "{{ .Release.Name }}-server" @@ -110,7 +109,6 @@ spec: strategy: type: Recreate replicas: 1 - revisionHistoryLimit: 0 selector: matchLabels: app: "{{ .Release.Name }}-ml" diff --git a/scripts/sync_pvc_from_host.py b/scripts/sync_pvc_from_host.py new file mode 100755 index 0000000..63a703c --- /dev/null +++ b/scripts/sync_pvc_from_host.py @@ -0,0 +1,357 @@ +#!/usr/bin/env python3 +""" +Sync data from a Kubernetes node host path to a Kubernetes PVC. + +This script creates a temporary pod that mounts both the hostPath (from the K8s node) +and the PVC, copies data from the host path to the PVC, and then cleans up the pod. + +The host_path should be a path on the Kubernetes node's filesystem, not on the local +machine running this script. + +Usage: + python3 sync_pvc_from_host.py [--verify] [--dry-run] + +Examples: + python3 sync_pvc_from_host.py immich-postgres-data prod /data/volumes/prod/immich-postgres-data/ + python3 sync_pvc_from_host.py my-pvc default /backup/data --verify + python3 sync_pvc_from_host.py my-pvc default /backup/data --dry-run +""" + +import subprocess +import sys +import argparse +import time +import os +import tempfile + + +def run_kubectl(cmd, check=True, capture_output=True): + """Run a kubectl command.""" + try: + result = subprocess.run( + ["kubectl"] + cmd, + capture_output=capture_output, + text=True, + check=check + ) + return result.stdout.strip() if capture_output else None + except subprocess.CalledProcessError as e: + print(f"Error running kubectl {' '.join(cmd)}", file=sys.stderr) + if capture_output: + print(f"stdout: {e.stdout}", file=sys.stderr) + print(f"stderr: {e.stderr}", file=sys.stderr) + raise + + +def check_pvc_exists(pvc_name, namespace): + """Check if a PVC exists.""" + try: + run_kubectl(["get", "pvc", pvc_name, "-n", namespace], check=True) + return True + except subprocess.CalledProcessError: + return False + + +def check_host_path_exists_on_node(host_path, pod_name, namespace): + """Check if the host path exists on the Kubernetes node.""" + try: + result = run_kubectl([ + "exec", "-n", namespace, pod_name, "-c", "sync", "--", + "test", "-d", "/host-data" + ], check=False) + # If test succeeds, directory exists + return True + except: + return False + + +def create_temp_pod(pvc_name, namespace, pod_name, host_path): + """Create a temporary pod that mounts both the hostPath and the PVC.""" + print(f"Creating temporary pod '{pod_name}'...") + + pod_manifest = f"""apiVersion: v1 +kind: Pod +metadata: + name: {pod_name} + namespace: {namespace} +spec: + containers: + - name: sync + image: busybox:latest + command: ["sleep", "3600"] + securityContext: + privileged: true + volumeMounts: + - name: host-data + mountPath: /host-data + readOnly: true + - name: pvc + mountPath: /pvc-data + volumes: + - name: host-data + hostPath: + path: {host_path} + type: DirectoryOrCreate + - name: pvc + persistentVolumeClaim: + claimName: {pvc_name} + restartPolicy: Never +""" + + # Write manifest to temp file + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + f.write(pod_manifest) + temp_file = f.name + + try: + # Apply the pod manifest + run_kubectl(["apply", "-f", temp_file], check=True) + + # Wait for pod to be ready + print("Waiting for pod to be ready...") + max_wait = 60 # seconds + start_time = time.time() + + while time.time() - start_time < max_wait: + try: + result = run_kubectl( + ["get", "pod", pod_name, "-n", namespace, "-o", "jsonpath={.status.phase}"], + check=True + ) + if result == "Running": + print(f"✓ Pod '{pod_name}' is ready") + return True + elif result == "Failed": + print(f"ERROR: Pod '{pod_name}' failed to start", file=sys.stderr) + return False + except subprocess.CalledProcessError: + pass + + time.sleep(2) + + print(f"ERROR: Pod '{pod_name}' did not become ready within {max_wait} seconds", file=sys.stderr) + return False + finally: + # Clean up temp file + try: + os.unlink(temp_file) + except OSError: + pass + + +def delete_pod(pod_name, namespace): + """Delete a pod.""" + print(f"Deleting temporary pod '{pod_name}'...") + try: + run_kubectl(["delete", "pod", pod_name, "-n", namespace], check=True) + print(f"✓ Pod '{pod_name}' deleted") + except subprocess.CalledProcessError as e: + print(f"Warning: Failed to delete pod '{pod_name}': {e}", file=sys.stderr) + + +def sync_data_from_host(host_path, pod_name, namespace): + """Sync data from host path (mounted in pod) to PVC using tar.""" + print(f"Syncing data from host path '{host_path}' (mounted at /host-data) to PVC (mounted at /pvc-data)...") + + # First verify the host path exists in the pod + print("Verifying host path is accessible in pod...") + try: + result = run_kubectl([ + "exec", "-n", namespace, pod_name, "-c", "sync", "--", + "test", "-d", "/host-data" + ], check=True) + print("✓ Host path is accessible") + except subprocess.CalledProcessError: + print(f"ERROR: Host path '/host-data' is not accessible in pod", file=sys.stderr) + raise + + # Check if host path has data + try: + file_count = run_kubectl([ + "exec", "-n", namespace, pod_name, "-c", "sync", "--", + "sh", "-c", "find /host-data -type f | wc -l" + ], check=True) + file_count = int(file_count.strip()) + if file_count == 0: + print("⚠ Warning: Host path appears to be empty") + else: + print(f"Found {file_count} files in host path") + except: + pass + + # Use tar to sync data from /host-data to /pvc-data + print("Syncing data using tar (this may take a while)...") + + # Clear PVC data first, then copy from host + sync_cmd = [ + "exec", "-n", namespace, pod_name, "-c", "sync", "--", + "sh", "-c", """ + # Clear existing PVC data + rm -rf /pvc-data/* /pvc-data/..?* /pvc-data/.[!.]* 2>/dev/null || true + + # Create tar from host-data and extract to pvc-data in one go + cd /host-data && tar -cf - . | (cd /pvc-data && tar -xf -) + + # Verify sync completed + if [ $? -eq 0 ]; then + echo "Sync completed successfully" + else + echo "Sync failed" >&2 + exit 1 + fi + """ + ] + + run_kubectl(sync_cmd, check=True) + print("✓ Data sync completed") + + +def verify_sync(host_path, pod_name, namespace): + """Verify that the sync was successful by comparing file counts and sizes.""" + print("\nVerifying sync...") + + try: + # Count files on host (mounted in pod) + host_count_result = run_kubectl([ + "exec", "-n", namespace, pod_name, "-c", "sync", "--", + "sh", "-c", "find /host-data -type f | wc -l" + ], check=True) + host_file_count = int(host_count_result.strip()) + + host_size_result = run_kubectl([ + "exec", "-n", namespace, pod_name, "-c", "sync", "--", + "sh", "-c", "du -sb /host-data 2>/dev/null | cut -f1 || echo 0" + ], check=True) + host_total_size = int(host_size_result.strip() or "0") + + # Count files in PVC + pvc_count_result = run_kubectl([ + "exec", "-n", namespace, pod_name, "-c", "sync", "--", + "sh", "-c", "find /pvc-data -type f | wc -l" + ], check=True) + pvc_file_count = int(pvc_count_result.strip()) + + pvc_size_result = run_kubectl([ + "exec", "-n", namespace, pod_name, "-c", "sync", "--", + "sh", "-c", "du -sb /pvc-data 2>/dev/null | cut -f1 || echo 0" + ], check=True) + pvc_total_size = int(pvc_size_result.strip() or "0") + + print(f"Host path: {host_file_count} files, {host_total_size} bytes") + print(f"PVC: {pvc_file_count} files, {pvc_total_size} bytes") + + if host_file_count == pvc_file_count: + print("✓ File counts match") + else: + print(f"⚠ Warning: File counts differ ({host_file_count} vs {pvc_file_count})") + + # Allow some tolerance for size differences (due to filesystem overhead) + size_diff = abs(host_total_size - pvc_total_size) + if size_diff < 1024: # Less than 1KB difference + print("✓ Total sizes match (within tolerance)") + else: + print(f"⚠ Warning: Size difference: {size_diff} bytes") + + return host_file_count == pvc_file_count and size_diff < 1024 + except subprocess.CalledProcessError as e: + print(f"Warning: Could not verify sync: {e}", file=sys.stderr) + return False + + +def main(): + parser = argparse.ArgumentParser( + description="Sync data from host path to Kubernetes PVC", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s immich-postgres-data prod /data/volumes/prod/immich-postgres-data/ + %(prog)s my-pvc default /backup/data --verify + %(prog)s my-pvc default /backup/data --dry-run + """ + ) + parser.add_argument( + "pvc_name", + help="Name of the PVC to sync to" + ) + parser.add_argument( + "namespace", + help="Namespace where the PVC exists" + ) + parser.add_argument( + "host_path", + help="Host path to sync from (source of truth)" + ) + parser.add_argument( + "--verify", + action="store_true", + help="Verify sync by comparing file counts and sizes" + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be done without actually syncing" + ) + + args = parser.parse_args() + + # Normalize host path (but don't check locally - it's on the K8s node) + host_path = args.host_path.rstrip('/') + + print("=" * 60) + print("PVC Sync from Kubernetes Node Host Path") + print("=" * 60) + print(f"PVC: {args.pvc_name}") + print(f"Namespace: {args.namespace}") + print(f"Host Path (on K8s node): {host_path}") + print("=" * 60) + print() + + # Validate inputs + if not check_pvc_exists(args.pvc_name, args.namespace): + print(f"ERROR: PVC '{args.pvc_name}' not found in namespace '{args.namespace}'", file=sys.stderr) + sys.exit(1) + print(f"✓ PVC '{args.pvc_name}' exists") + print(f"✓ Will check host path '{host_path}' once pod is created") + print() + + if args.dry_run: + print("DRY RUN: Would sync data from Kubernetes node host path to PVC") + print("Remove --dry-run to perform actual sync") + sys.exit(0) + + # Generate unique pod name + pod_name = f"pvc-sync-{args.pvc_name}-{int(time.time())}" + + pod_created = False + try: + # Create temporary pod with both hostPath and PVC mounted + if not create_temp_pod(args.pvc_name, args.namespace, pod_name, host_path): + sys.exit(1) + pod_created = True + + # Sync data + sync_data_from_host(host_path, pod_name, args.namespace) + + # Verify if requested + if args.verify: + verify_sync(host_path, pod_name, args.namespace) + + print() + print("=" * 60) + print("Sync completed successfully!") + print("=" * 60) + + except KeyboardInterrupt: + print("\n\nSync interrupted by user.", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"\n\nERROR: {e}", file=sys.stderr) + sys.exit(1) + finally: + # Always clean up the temporary pod + if pod_created: + delete_pod(pod_name, args.namespace) + + +if __name__ == "__main__": + main()