from neurenix import load_model, save_model
from neurenix.quantization import quantize_model, prune_model, QuantizationType
from neurenix.onnx_support import to_onnx
import numpy as np
# 1. Load trained model
print("Loading model...")
model = load_model("trained_model.nx")
# 2. Prune model
print("Pruning model...")
pruned_model = prune_model(model, pruning_rate=0.3, method="magnitude")
# 3. Quantize model
print("Quantizing model...")
optimized_model = quantize_model(pruned_model, dtype=QuantizationType.INT8)
# 4. Evaluate optimized model
print("Evaluating...")
test_accuracy = evaluate(optimized_model, test_dataset)
print(f"Optimized model accuracy: {test_accuracy:.4f}")
# 5. Benchmark performance
print("Benchmarking...")
stats = benchmark_model(optimized_model, (1, 3, 224, 224))
print(f"Inference time: {stats['mean_ms']:.2f} ms")
print(f"Throughput: {stats['throughput_fps']:.2f} FPS")
# 6. Save optimized model
print("Saving optimized model...")
save_model(optimized_model, "edge_model.nx")
# 7. Export to ONNX
print("Exporting to ONNX...")
to_onnx(
model=optimized_model,
input_shape=(1, 3, 224, 224),
output_path="edge_model.onnx",
opset_version=13
)
print("Edge deployment package ready!")
print(f"Model size: {get_model_size(optimized_model):.2f} MB")
print(f"ONNX model: edge_model.onnx")