30 Days of MLOps Challenge · Day 10
Serving ML Models with FastAPI & Flask
Turn trained models into robust HTTP APIs with FastAPI or Flask. Handle validation, errors, logging, and CORS for production‑ready inference services.
💡 Hey — It's Aviraj Kawade 👋
Key Learnings
- Expose ML models as REST APIs using Flask or FastAPI.
- Understand differences between Flask and FastAPI for serving.
- Handle JSON I/O, validation, and error responses.
- Set up logging and CORS for production‑ready APIs.
Learn more

Convert Your Model into a REST API
We will use a simple scikit‑learn RandomForest on the Iris dataset, saved as a pickle file, then serve it via Flask or FastAPI.
Save a model
# save_model.py
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
import pickle
iris = load_iris()
X, y = iris.data, iris.target
model = RandomForestClassifier()
model.fit(X, y)
with open("iris_model.pkl", "wb") as f:
pickle.dump(model, f)
Option 1: Flask
Install:
pip install flask scikit-learn
App:
# app_flask.py
from flask import Flask, request, jsonify
import pickle
import numpy as np
app = Flask(__name__)
model = pickle.load(open("iris_model.pkl", "rb"))
@app.route("/predict", methods=["POST"])
def predict():
data = request.get_json()
features = np.array(data["features"]).reshape(1, -1)
prediction = model.predict(features)
return jsonify({"prediction": int(prediction[0])})
@app.route("/health", methods=["GET"])
def health():
return jsonify({"status": "ok"})
if __name__ == "__main__":
app.run(debug=True)
Test:
curl -X POST http://127.0.0.1:5000/predict \
-H "Content-Type: application/json" \
-d '{"features": [5.1, 3.5, 1.4, 0.2]}'
Option 2: FastAPI
Install:
pip install fastapi uvicorn scikit-learn
App:
# app_fastapi.py
from fastapi import FastAPI
from pydantic import BaseModel
import pickle
import numpy as np
class Features(BaseModel):
features: list
app = FastAPI()
model = pickle.load(open("iris_model.pkl", "rb"))
@app.post("/predict")
def predict(data: Features):
features = np.array(data.features).reshape(1, -1)
prediction = model.predict(features)
return {"prediction": int(prediction[0])}
@app.get("/health")
def health():
return {"status": "ok"}
Run:
uvicorn app_fastapi:app --reload
Test:
curl -X POST http://127.0.0.1:8000/predict \
-H "Content-Type: application/json" \
-d '{"features": [5.1, 3.5, 1.4, 0.2]}'
Validation & Error Handling (FastAPI)
pip install fastapi uvicorn scikit-learn pydantic
# app_fastapi.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, conlist
import pickle, numpy as np
app = FastAPI()
model = pickle.load(open("iris_model.pkl", "rb"))
class Features(BaseModel):
features: conlist(float, min_items=4, max_items=4)
@app.post("/predict")
def predict(data: Features):
try:
features = np.array(data.features).reshape(1, -1)
prediction = model.predict(features)
return {"prediction": int(prediction[0])}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@app.get("/health")
def health():
return {"status": "ok"}
Auto‑validation example: sending too few items returns a 422 with details.
Validation & Error Handling (Flask)
pip install flask scikit-learn
# app_flask.py
from flask import Flask, request, jsonify
import pickle, numpy as np
app = Flask(__name__)
model = pickle.load(open("iris_model.pkl", "rb"))
@app.route("/predict", methods=["POST"])
def predict():
try:
data = request.get_json()
features = data.get("features", [])
if not isinstance(features, list) or len(features) != 4:
return jsonify({"error": "features must be a list of 4 numbers"}), 400
features = np.array(features).reshape(1, -1)
prediction = model.predict(features)
return jsonify({"prediction": int(prediction[0])})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/health", methods=["GET"])
def health():
return jsonify({"status": "ok"})
Flask vs FastAPI (Quick View)
Feature | Flask | FastAPI |
---|---|---|
ASGI | No (WSGI) | Yes (ASGI) |
Performance | Slower | Faster |
Validation | Manual or libs | Pydantic built‑in |
Docs | Manual | Automatic (Swagger/Redoc) |
Async | Limited | First‑class |
Production: Logging & CORS
FastAPI
pip install fastapi uvicorn scikit-learn python-multipart python-dotenv
# app_fastapi.py
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, conlist
import logging, pickle, numpy as np
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("ml-api")
model = pickle.load(open("iris_model.pkl", "rb"))
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # restrict in prod
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class Features(BaseModel):
features: conlist(float, min_items=4, max_items=4)
@app.middleware("http")
async def log_requests(request: Request, call_next):
logger.info(f"Incoming {request.method} {request.url}")
response = await call_next(request)
logger.info(f"Completed with {response.status_code}")
return response
@app.post("/predict")
def predict(data: Features):
features = np.array(data.features).reshape(1, -1)
prediction = model.predict(features)
logger.info(f"Prediction: {prediction[0]}")
return {"prediction": int(prediction[0])}
@app.get("/health")
def health():
return {"status": "ok"}
Flask
pip install flask flask-cors scikit-learn
# app_flask.py
from flask import Flask, request, jsonify
from flask_cors import CORS
import logging, pickle, numpy as np
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("ml-api")
app = Flask(__name__)
CORS(app) # restrict origins in prod
model = pickle.load(open("iris_model.pkl", "rb"))
@app.before_request
def log_request_info():
logger.info(f"Incoming {request.method} {request.path}")
@app.route("/predict", methods=["POST"])
def predict():
try:
data = request.get_json()
features = data.get("features", [])
if not isinstance(features, list) or len(features) != 4:
return jsonify({"error": "features must be a list of 4 numbers"}), 400
features = np.array(features).reshape(1, -1)
prediction = model.predict(features)
logger.info(f"Prediction: {prediction[0]}")
return jsonify({"prediction": int(prediction[0])})
except Exception as e:
logger.error(f"Error: {str(e)}")
return jsonify({"error": str(e)}), 500
@app.route("/health", methods=["GET"])
def health():
return jsonify({"status": "ok"})
Challenges
- Serve a scikit‑learn or TensorFlow model using Flask or FastAPI.
- Create a /predict POST route that accepts a list of input features.
- Add input validation and error handling.
- Add support for batch predictions (multiple rows).
- Add logging of requests and predictions to a file.