Overview
Neurenix provides native Kubernetes integration for deploying, scaling, and managing ML models in production. The framework includes support for:- Deployments: Scalable model serving with rolling updates
- Pods: Individual container instances
- Services: Load balancing and service discovery
- ConfigMaps & Secrets: Configuration and credential management
- Jobs: Batch inference and training
Prerequisites
- Kubernetes cluster (1.19+)
- kubectl configured
- Docker images built and pushed to a registry
# Verify kubectl
kubectl version --client
Quick Start
Deploy a Model
from neurenix.kubernetes import Deployment, DeploymentConfig
# Create deployment configuration
config = DeploymentConfig(
name="neurenix-model",
image="myregistry.com/neurenix-model:latest",
replicas=3,
namespace="default",
ports=[{"containerPort": 8000, "protocol": "TCP"}],
env={
"MODEL_PATH": "/app/model.nx",
"DEVICE": "cpu"
},
resources={
"requests": {"cpu": "500m", "memory": "1Gi"},
"limits": {"cpu": "2", "memory": "4Gi"}
}
)
# Create and deploy
deployment = Deployment(name="neurenix-model", namespace="default")
deployment.create(config)
print("Deployment created successfully")
Expose via Service
from neurenix.kubernetes import Service, ServiceConfig
service_config = ServiceConfig(
name="neurenix-model-service",
namespace="default",
selector={"app": "neurenix-model"},
ports=[{"port": 80, "targetPort": 8000, "protocol": "TCP"}],
type="LoadBalancer"
)
service = Service(name="neurenix-model-service", namespace="default")
service.create(service_config)
print("Service created successfully")
Deployments
DeploymentConfig
Comprehensive deployment configuration:from neurenix.kubernetes import DeploymentConfig
config = DeploymentConfig(
name="ml-inference",
image="neurenix-model:v1.0",
replicas=5,
namespace="production",
labels={
"app": "ml-inference",
"version": "v1.0",
"component": "model-serving"
},
annotations={
"prometheus.io/scrape": "true",
"prometheus.io/port": "8000"
},
env={
"MODEL_PATH": "/models/model.nx",
"LOG_LEVEL": "info",
"WORKERS": "4"
},
ports=[{"containerPort": 8000, "name": "http"}],
resources={
"requests": {
"cpu": "1",
"memory": "2Gi"
},
"limits": {
"cpu": "4",
"memory": "8Gi"
}
},
liveness_probe={
"httpGet": {
"path": "/health",
"port": 8000
},
"initialDelaySeconds": 30,
"periodSeconds": 10
},
readiness_probe={
"httpGet": {
"path": "/ready",
"port": 8000
},
"initialDelaySeconds": 10,
"periodSeconds": 5
},
strategy={
"type": "RollingUpdate",
"rollingUpdate": {
"maxSurge": 1,
"maxUnavailable": 0
}
}
)
Deployment Operations
from neurenix.kubernetes import Deployment
deployment = Deployment(name="ml-inference", namespace="production")
# Create deployment
deployment.create(config)
# Check if exists
if deployment.exists():
print("Deployment is running")
# Get deployment info
info = deployment.get()
print(f"Replicas: {info['spec']['replicas']}")
# Scale deployment
deployment.scale(replicas=10)
# Restart deployment (rolling restart)
deployment.restart()
# Get deployment status
status = deployment.status()
print(f"Available replicas: {status.get('availableReplicas', 0)}")
# View logs
logs = deployment.logs(tail=100)
print(logs)
# Execute command in deployment
output = deployment.exec(["curl", "http://localhost:8000/health"])
print(output)
# Update image
deployment.update_image("neurenix-model:v2.0")
# Delete deployment
deployment.delete(wait=True)
Neurenix-Specific Deployment
Simplified deployment creation:deployment = Deployment(name="neurenix-prod", namespace="ml-models")
deployment.create_neurenix_deployment(
image="neurenix-model:latest",
model_path="/models/classifier.nx",
replicas=3,
gpu=False,
memory="4Gi",
cpu="2",
port=8000,
env={
"BATCH_SIZE": "32",
"TIMEOUT": "30"
}
)
GPU Deployments
config = DeploymentConfig(
name="gpu-inference",
image="neurenix-model:cuda",
replicas=2,
namespace="gpu-workloads",
resources={
"requests": {
"cpu": "4",
"memory": "16Gi",
"nvidia.com/gpu": "1"
},
"limits": {
"cpu": "8",
"memory": "32Gi",
"nvidia.com/gpu": "1"
}
},
node_selector={
"accelerator": "nvidia-tesla-v100"
},
tolerations=[
{
"key": "nvidia.com/gpu",
"operator": "Exists",
"effect": "NoSchedule"
}
]
)
Pods
PodConfig
from neurenix.kubernetes import PodConfig
config = PodConfig(
name="inference-pod",
image="neurenix-model:latest",
namespace="default",
labels={"app": "inference"},
env={"MODEL_PATH": "/app/model.nx"},
ports=[{"containerPort": 8000}],
resources={
"requests": {"cpu": "1", "memory": "2Gi"},
"limits": {"cpu": "2", "memory": "4Gi"}
},
restart_policy="Always",
volumes=[
{
"name": "model-storage",
"persistentVolumeClaim": {"claimName": "model-pvc"}
}
],
volume_mounts=[
{"name": "model-storage", "mountPath": "/app/models"}
]
)
Pod Operations
from neurenix.kubernetes import Pod
pod = Pod(name="inference-pod", namespace="default")
# Create pod
pod.create(config)
# Check status
status = pod.status()
print(f"Pod status: {status}")
# Get pod info
info = pod.get()
print(f"IP: {info['status']['podIP']}")
# View logs
logs = pod.logs(tail=50)
print(logs)
# Follow logs
logs = pod.logs(follow=True)
# Execute command
output = pod.exec(["ls", "-la", "/app"])
print(output)
# Port forwarding
port_forward_process = pod.port_forward(local_port=8080, remote_port=8000)
print("Access at http://localhost:8080")
# ... use the service ...
port_forward_process.terminate()
# Copy files
pod.copy_to("./model.nx", "/app/model.nx")
pod.copy_from("/app/output.json", "./output.json")
# Delete pod
pod.delete(force=True)
Create Neurenix Pod
pod = Pod(name="neurenix-worker", namespace="ml-jobs")
pod.create_neurenix_pod(
image="neurenix-model:latest",
model_path="/models/model.nx",
gpu=True,
memory="8Gi",
cpu="4",
port=8000,
env={"DEVICE": "cuda"},
command=["python"],
args=["inference.py"]
)
Services
ServiceConfig
from neurenix.kubernetes import ServiceConfig
# ClusterIP (internal)
cluster_config = ServiceConfig(
name="internal-service",
namespace="default",
selector={"app": "neurenix-model"},
ports=[{"port": 80, "targetPort": 8000}],
type="ClusterIP"
)
# NodePort (external access via node IP)
node_config = ServiceConfig(
name="nodeport-service",
namespace="default",
selector={"app": "neurenix-model"},
ports=[{
"port": 80,
"targetPort": 8000,
"nodePort": 30080
}],
type="NodePort"
)
# LoadBalancer (cloud provider LB)
lb_config = ServiceConfig(
name="lb-service",
namespace="default",
selector={"app": "neurenix-model"},
ports=[{"port": 80, "targetPort": 8000}],
type="LoadBalancer",
external_traffic_policy="Local"
)
Service Operations
from neurenix.kubernetes import Service
service = Service(name="neurenix-service", namespace="default")
# Create service
service.create(lb_config)
# Check if exists
if service.exists():
print("Service is running")
# Get service info
info = service.get()
print(f"Type: {info['spec']['type']}")
# Get external IP (LoadBalancer)
external_ip = service.get_external_ip()
if external_ip:
print(f"Access at http://{external_ip}")
# Get cluster IP
cluster_ip = service.get_cluster_ip()
print(f"Internal IP: {cluster_ip}")
# Get node port
node_port = service.get_node_port(port=80)
if node_port:
print(f"NodePort: {node_port}")
# Get endpoints
endpoints = service.get_endpoints()
print(f"Endpoints: {endpoints}")
# Port forward
port_forward = service.port_forward(local_port=8080, remote_port=80)
print("Forwarded to localhost:8080")
# Delete service
service.delete()
Create Neurenix Service
service = Service(name="neurenix-api", namespace="production")
service.create_neurenix_service(
port=80,
target_port=8000,
type="LoadBalancer",
selector={"app": "neurenix-model", "version": "v1"},
external_traffic_policy="Local"
)
Complete Production Deployment
from neurenix.kubernetes import (
Deployment, DeploymentConfig,
Service, ServiceConfig,
ConfigMap, Secret
)
# 1. Create ConfigMap for configuration
config_map = ConfigMap(
name="model-config",
namespace="production",
data={
"model.conf": "batch_size=32\ntimeout=30",
"logging.conf": "level=info\nformat=json"
}
)
# 2. Create Secret for credentials
secret = Secret(
name="model-secrets",
namespace="production",
data={
"api-key": "base64-encoded-key",
"db-password": "base64-encoded-password"
}
)
# 3. Create Deployment
deployment_config = DeploymentConfig(
name="neurenix-production",
image="myregistry.com/neurenix-model:v2.0",
replicas=5,
namespace="production",
labels={
"app": "neurenix",
"version": "v2.0",
"tier": "api"
},
env={
"MODEL_PATH": "/models/model.nx",
"CONFIG_PATH": "/etc/config"
},
env_from=[
{"configMapRef": {"name": "model-config"}},
{"secretRef": {"name": "model-secrets"}}
],
ports=[{"containerPort": 8000, "name": "http"}],
resources={
"requests": {"cpu": "2", "memory": "4Gi"},
"limits": {"cpu": "4", "memory": "8Gi"}
},
liveness_probe={
"httpGet": {"path": "/health", "port": 8000},
"initialDelaySeconds": 30,
"periodSeconds": 10,
"timeoutSeconds": 5,
"failureThreshold": 3
},
readiness_probe={
"httpGet": {"path": "/ready", "port": 8000},
"initialDelaySeconds": 10,
"periodSeconds": 5
},
strategy={
"type": "RollingUpdate",
"rollingUpdate": {
"maxSurge": 1,
"maxUnavailable": 0
}
}
)
deployment = Deployment(name="neurenix-production", namespace="production")
deployment.create(deployment_config)
# 4. Create Service
service_config = ServiceConfig(
name="neurenix-api",
namespace="production",
selector={"app": "neurenix"},
ports=[{"port": 80, "targetPort": 8000}],
type="LoadBalancer",
annotations={
"service.beta.kubernetes.io/aws-load-balancer-type": "nlb"
}
)
service = Service(name="neurenix-api", namespace="production")
service.create(service_config)
print("Production deployment complete!")
print(f"External IP: {service.get_external_ip()}")
YAML Export
Export configurations to YAML files:# Export deployment YAML
yaml_content = deployment_config.to_yaml()
with open("deployment.yaml", "w") as f:
f.write(yaml_content)
# Export service YAML
service_yaml = service_config.to_yaml()
with open("service.yaml", "w") as f:
f.write(service_yaml)
# Apply with kubectl
import subprocess
subprocess.run(["kubectl", "apply", "-f", "deployment.yaml"])
subprocess.run(["kubectl", "apply", "-f", "service.yaml"])
Best Practices
- Resource Limits: Always set CPU and memory limits to prevent resource exhaustion
- Health Checks: Implement liveness and readiness probes for reliability
- Rolling Updates: Use rolling updates with maxUnavailable=0 for zero-downtime deployments
- Horizontal Pod Autoscaling: Configure HPA for automatic scaling based on metrics
- Pod Disruption Budgets: Protect availability during cluster maintenance
- Namespaces: Use separate namespaces for different environments
- Labels and Selectors: Use consistent labeling for service discovery and monitoring
- Secrets Management: Use Kubernetes secrets or external secret managers
- Monitoring: Integrate with Prometheus and Grafana for observability
- Logging: Use structured logging with centralized log aggregation
Troubleshooting
Check kubectl Installation
try:
deployment = Deployment("test", "default")
except RuntimeError as e:
print(f"kubectl error: {e}")
# Install kubectl or configure kubeconfig
Debug Deployment Issues
# Check deployment status
status = deployment.status()
print(f"Desired replicas: {status.get('replicas')}")
print(f"Available replicas: {status.get('availableReplicas')}")
# View logs
logs = deployment.logs(tail=100)
print(logs)
# Describe deployment (using kubectl)
import subprocess
subprocess.run(["kubectl", "describe", "deployment", "neurenix-model"])
# Check events
subprocess.run(["kubectl", "get", "events", "--sort-by=.metadata.creationTimestamp"])