Kubernetes Deployment

Overview

Neurenix provides native Kubernetes integration for deploying, scaling, and managing ML models in production. The framework includes support for:

Deployments: Scalable model serving with rolling updates
Pods: Individual container instances
Services: Load balancing and service discovery
ConfigMaps & Secrets: Configuration and credential management
Jobs: Batch inference and training

Prerequisites

Kubernetes cluster (1.19+)
kubectl configured
Docker images built and pushed to a registry

# Verify kubectl
kubectl version --client

Quick Start

Deploy a Model

from neurenix.kubernetes import Deployment, DeploymentConfig

# Create deployment configuration
config = DeploymentConfig(
    name="neurenix-model",
    image="myregistry.com/neurenix-model:latest",
    replicas=3,
    namespace="default",
    ports=[{"containerPort": 8000, "protocol": "TCP"}],
    env={
        "MODEL_PATH": "/app/model.nx",
        "DEVICE": "cpu"
    },
    resources={
        "requests": {"cpu": "500m", "memory": "1Gi"},
        "limits": {"cpu": "2", "memory": "4Gi"}
    }
)

# Create and deploy
deployment = Deployment(name="neurenix-model", namespace="default")
deployment.create(config)

print("Deployment created successfully")

Expose via Service

from neurenix.kubernetes import Service, ServiceConfig

service_config = ServiceConfig(
    name="neurenix-model-service",
    namespace="default",
    selector={"app": "neurenix-model"},
    ports=[{"port": 80, "targetPort": 8000, "protocol": "TCP"}],
    type="LoadBalancer"
)

service = Service(name="neurenix-model-service", namespace="default")
service.create(service_config)

print("Service created successfully")

Deployments

DeploymentConfig

Comprehensive deployment configuration:

from neurenix.kubernetes import DeploymentConfig

config = DeploymentConfig(
    name="ml-inference",
    image="neurenix-model:v1.0",
    replicas=5,
    namespace="production",
    labels={
        "app": "ml-inference",
        "version": "v1.0",
        "component": "model-serving"
    },
    annotations={
        "prometheus.io/scrape": "true",
        "prometheus.io/port": "8000"
    },
    env={
        "MODEL_PATH": "/models/model.nx",
        "LOG_LEVEL": "info",
        "WORKERS": "4"
    },
    ports=[{"containerPort": 8000, "name": "http"}],
    resources={
        "requests": {
            "cpu": "1",
            "memory": "2Gi"
        },
        "limits": {
            "cpu": "4",
            "memory": "8Gi"
        }
    },
    liveness_probe={
        "httpGet": {
            "path": "/health",
            "port": 8000
        },
        "initialDelaySeconds": 30,
        "periodSeconds": 10
    },
    readiness_probe={
        "httpGet": {
            "path": "/ready",
            "port": 8000
        },
        "initialDelaySeconds": 10,
        "periodSeconds": 5
    },
    strategy={
        "type": "RollingUpdate",
        "rollingUpdate": {
            "maxSurge": 1,
            "maxUnavailable": 0
        }
    }
)

Deployment Operations

from neurenix.kubernetes import Deployment

deployment = Deployment(name="ml-inference", namespace="production")

# Create deployment
deployment.create(config)

# Check if exists
if deployment.exists():
    print("Deployment is running")

# Get deployment info
info = deployment.get()
print(f"Replicas: {info['spec']['replicas']}")

# Scale deployment
deployment.scale(replicas=10)

# Restart deployment (rolling restart)
deployment.restart()

# Get deployment status
status = deployment.status()
print(f"Available replicas: {status.get('availableReplicas', 0)}")

# View logs
logs = deployment.logs(tail=100)
print(logs)

# Execute command in deployment
output = deployment.exec(["curl", "http://localhost:8000/health"])
print(output)

# Update image
deployment.update_image("neurenix-model:v2.0")

# Delete deployment
deployment.delete(wait=True)

Neurenix-Specific Deployment

Simplified deployment creation:

deployment = Deployment(name="neurenix-prod", namespace="ml-models")

deployment.create_neurenix_deployment(
    image="neurenix-model:latest",
    model_path="/models/classifier.nx",
    replicas=3,
    gpu=False,
    memory="4Gi",
    cpu="2",
    port=8000,
    env={
        "BATCH_SIZE": "32",
        "TIMEOUT": "30"
    }
)

GPU Deployments

config = DeploymentConfig(
    name="gpu-inference",
    image="neurenix-model:cuda",
    replicas=2,
    namespace="gpu-workloads",
    resources={
        "requests": {
            "cpu": "4",
            "memory": "16Gi",
            "nvidia.com/gpu": "1"
        },
        "limits": {
            "cpu": "8",
            "memory": "32Gi",
            "nvidia.com/gpu": "1"
        }
    },
    node_selector={
        "accelerator": "nvidia-tesla-v100"
    },
    tolerations=[
        {
            "key": "nvidia.com/gpu",
            "operator": "Exists",
            "effect": "NoSchedule"
        }
    ]
)

Pods

PodConfig

from neurenix.kubernetes import PodConfig

config = PodConfig(
    name="inference-pod",
    image="neurenix-model:latest",
    namespace="default",
    labels={"app": "inference"},
    env={"MODEL_PATH": "/app/model.nx"},
    ports=[{"containerPort": 8000}],
    resources={
        "requests": {"cpu": "1", "memory": "2Gi"},
        "limits": {"cpu": "2", "memory": "4Gi"}
    },
    restart_policy="Always",
    volumes=[
        {
            "name": "model-storage",
            "persistentVolumeClaim": {"claimName": "model-pvc"}
        }
    ],
    volume_mounts=[
        {"name": "model-storage", "mountPath": "/app/models"}
    ]
)

Pod Operations

from neurenix.kubernetes import Pod

pod = Pod(name="inference-pod", namespace="default")

# Create pod
pod.create(config)

# Check status
status = pod.status()
print(f"Pod status: {status}")

# Get pod info
info = pod.get()
print(f"IP: {info['status']['podIP']}")

# View logs
logs = pod.logs(tail=50)
print(logs)

# Follow logs
logs = pod.logs(follow=True)

# Execute command
output = pod.exec(["ls", "-la", "/app"])
print(output)

# Port forwarding
port_forward_process = pod.port_forward(local_port=8080, remote_port=8000)
print("Access at http://localhost:8080")
# ... use the service ...
port_forward_process.terminate()

# Copy files
pod.copy_to("./model.nx", "/app/model.nx")
pod.copy_from("/app/output.json", "./output.json")

# Delete pod
pod.delete(force=True)

Create Neurenix Pod

pod = Pod(name="neurenix-worker", namespace="ml-jobs")

pod.create_neurenix_pod(
    image="neurenix-model:latest",
    model_path="/models/model.nx",
    gpu=True,
    memory="8Gi",
    cpu="4",
    port=8000,
    env={"DEVICE": "cuda"},
    command=["python"],
    args=["inference.py"]
)

Services

ServiceConfig

from neurenix.kubernetes import ServiceConfig

# ClusterIP (internal)
cluster_config = ServiceConfig(
    name="internal-service",
    namespace="default",
    selector={"app": "neurenix-model"},
    ports=[{"port": 80, "targetPort": 8000}],
    type="ClusterIP"
)

# NodePort (external access via node IP)
node_config = ServiceConfig(
    name="nodeport-service",
    namespace="default",
    selector={"app": "neurenix-model"},
    ports=[{
        "port": 80,
        "targetPort": 8000,
        "nodePort": 30080
    }],
    type="NodePort"
)

# LoadBalancer (cloud provider LB)
lb_config = ServiceConfig(
    name="lb-service",
    namespace="default",
    selector={"app": "neurenix-model"},
    ports=[{"port": 80, "targetPort": 8000}],
    type="LoadBalancer",
    external_traffic_policy="Local"
)

Service Operations

from neurenix.kubernetes import Service

service = Service(name="neurenix-service", namespace="default")

# Create service
service.create(lb_config)

# Check if exists
if service.exists():
    print("Service is running")

# Get service info
info = service.get()
print(f"Type: {info['spec']['type']}")

# Get external IP (LoadBalancer)
external_ip = service.get_external_ip()
if external_ip:
    print(f"Access at http://{external_ip}")

# Get cluster IP
cluster_ip = service.get_cluster_ip()
print(f"Internal IP: {cluster_ip}")

# Get node port
node_port = service.get_node_port(port=80)
if node_port:
    print(f"NodePort: {node_port}")

# Get endpoints
endpoints = service.get_endpoints()
print(f"Endpoints: {endpoints}")

# Port forward
port_forward = service.port_forward(local_port=8080, remote_port=80)
print("Forwarded to localhost:8080")

# Delete service
service.delete()

Create Neurenix Service

service = Service(name="neurenix-api", namespace="production")

service.create_neurenix_service(
    port=80,
    target_port=8000,
    type="LoadBalancer",
    selector={"app": "neurenix-model", "version": "v1"},
    external_traffic_policy="Local"
)

Complete Production Deployment

from neurenix.kubernetes import (
    Deployment, DeploymentConfig,
    Service, ServiceConfig,
    ConfigMap, Secret
)

# 1. Create ConfigMap for configuration
config_map = ConfigMap(
    name="model-config",
    namespace="production",
    data={
        "model.conf": "batch_size=32\ntimeout=30",
        "logging.conf": "level=info\nformat=json"
    }
)

# 2. Create Secret for credentials
secret = Secret(
    name="model-secrets",
    namespace="production",
    data={
        "api-key": "base64-encoded-key",
        "db-password": "base64-encoded-password"
    }
)

# 3. Create Deployment
deployment_config = DeploymentConfig(
    name="neurenix-production",
    image="myregistry.com/neurenix-model:v2.0",
    replicas=5,
    namespace="production",
    labels={
        "app": "neurenix",
        "version": "v2.0",
        "tier": "api"
    },
    env={
        "MODEL_PATH": "/models/model.nx",
        "CONFIG_PATH": "/etc/config"
    },
    env_from=[
        {"configMapRef": {"name": "model-config"}},
        {"secretRef": {"name": "model-secrets"}}
    ],
    ports=[{"containerPort": 8000, "name": "http"}],
    resources={
        "requests": {"cpu": "2", "memory": "4Gi"},
        "limits": {"cpu": "4", "memory": "8Gi"}
    },
    liveness_probe={
        "httpGet": {"path": "/health", "port": 8000},
        "initialDelaySeconds": 30,
        "periodSeconds": 10,
        "timeoutSeconds": 5,
        "failureThreshold": 3
    },
    readiness_probe={
        "httpGet": {"path": "/ready", "port": 8000},
        "initialDelaySeconds": 10,
        "periodSeconds": 5
    },
    strategy={
        "type": "RollingUpdate",
        "rollingUpdate": {
            "maxSurge": 1,
            "maxUnavailable": 0
        }
    }
)

deployment = Deployment(name="neurenix-production", namespace="production")
deployment.create(deployment_config)

# 4. Create Service
service_config = ServiceConfig(
    name="neurenix-api",
    namespace="production",
    selector={"app": "neurenix"},
    ports=[{"port": 80, "targetPort": 8000}],
    type="LoadBalancer",
    annotations={
        "service.beta.kubernetes.io/aws-load-balancer-type": "nlb"
    }
)

service = Service(name="neurenix-api", namespace="production")
service.create(service_config)

print("Production deployment complete!")
print(f"External IP: {service.get_external_ip()}")

YAML Export

Export configurations to YAML files:

# Export deployment YAML
yaml_content = deployment_config.to_yaml()
with open("deployment.yaml", "w") as f:
    f.write(yaml_content)

# Export service YAML
service_yaml = service_config.to_yaml()
with open("service.yaml", "w") as f:
    f.write(service_yaml)

# Apply with kubectl
import subprocess
subprocess.run(["kubectl", "apply", "-f", "deployment.yaml"])
subprocess.run(["kubectl", "apply", "-f", "service.yaml"])

Best Practices

Resource Limits: Always set CPU and memory limits to prevent resource exhaustion
Health Checks: Implement liveness and readiness probes for reliability
Rolling Updates: Use rolling updates with maxUnavailable=0 for zero-downtime deployments
Horizontal Pod Autoscaling: Configure HPA for automatic scaling based on metrics
Pod Disruption Budgets: Protect availability during cluster maintenance
Namespaces: Use separate namespaces for different environments
Labels and Selectors: Use consistent labeling for service discovery and monitoring
Secrets Management: Use Kubernetes secrets or external secret managers
Monitoring: Integrate with Prometheus and Grafana for observability
Logging: Use structured logging with centralized log aggregation

Troubleshooting

Check kubectl Installation

try:
    deployment = Deployment("test", "default")
except RuntimeError as e:
    print(f"kubectl error: {e}")
    # Install kubectl or configure kubeconfig

Debug Deployment Issues

# Check deployment status
status = deployment.status()
print(f"Desired replicas: {status.get('replicas')}")
print(f"Available replicas: {status.get('availableReplicas')}")

# View logs
logs = deployment.logs(tail=100)
print(logs)

# Describe deployment (using kubectl)
import subprocess
subprocess.run(["kubectl", "describe", "deployment", "neurenix-model"])

# Check events
subprocess.run(["kubectl", "get", "events", "--sort-by=.metadata.creationTimestamp"])

​Overview

​Prerequisites

​Quick Start

​Deploy a Model

​Expose via Service

​Deployments

​DeploymentConfig

​Deployment Operations

​Neurenix-Specific Deployment

​GPU Deployments

​Pods

​PodConfig

​Pod Operations

​Create Neurenix Pod

​Services

​ServiceConfig

​Service Operations

​Create Neurenix Service

​Complete Production Deployment

​YAML Export

​Best Practices

​Troubleshooting

​Check kubectl Installation

​Debug Deployment Issues

​Next Steps