Why Monitor Production AI?
Healthcare AI can degrade over time due to data drift, changing patient populations, or subtle model issues. Continuous monitoring ensures you catch problems early.| Risk | Example | Monitoring Solution |
|---|---|---|
| Data drift | New symptom patterns post-pandemic | Distribution monitoring |
| Performance degradation | Accuracy drops over months | Metric tracking with alerts |
| Safety events | Missed red flags in production | Real-time safety monitoring |
| Edge cases | Unusual patient presentations | Anomaly detection |
Step 1: Configure Production Logging
First, instrument your production system to send AI interactions to Rubric.production_logging.py
Copy
Ask AI
from rubric import Rubric
import asyncio
# Initialize client with production settings
client = Rubric(
api_key="rb_live_xxxxxxxx",
# Production optimizations
async_mode=True, # Non-blocking logging
batch_size=50, # Batch logs for efficiency
flush_interval=5.0, # Flush every 5 seconds
retry_on_failure=True, # Retry failed logs
max_retries=3
)
async def handle_triage_call(call_data):
"""Process a triage call and log to Rubric."""
# Your AI model inference
ai_result = await triage_model.predict(call_data)
# Log to Rubric (non-blocking)
await client.logs.create_async(
project="voice-triage-production",
input={
"transcript": call_data.transcript,
"patient_demographics": call_data.demographics,
"audio_url": call_data.audio_url
},
output={
"triage_level": ai_result.triage_level,
"confidence": ai_result.confidence,
"extracted_symptoms": ai_result.symptoms,
"red_flags_detected": ai_result.red_flags,
"disposition": ai_result.disposition
},
metadata={
"model_version": "triage-v3.2.1",
"latency_ms": ai_result.latency_ms,
"session_id": call_data.session_id,
"region": call_data.region
},
# Enable sampling for evaluation
sample_for_evaluation=True,
sample_rate=0.05 # Evaluate 5% of calls
)
return ai_result
# Ensure logs are flushed on shutdown
async def shutdown():
await client.flush()
await client.close()
Step 2: Create Monitoring Rules
monitoring_setup.py
Copy
Ask AI
from rubric import Rubric
client = Rubric()
# Create production monitor
monitor = client.monitors.create(
name="Voice Triage Production Monitor",
project="voice-triage-production",
# Sampling configuration
sampling={
"rate": 0.05, # Sample 5% of traffic
"stratify_by": ["metadata.region", "output.triage_level"],
"minimum_per_stratum": 10 # At least 10 per category
},
# Evaluation configuration
evaluation={
"evaluators": [
{"type": "triage_accuracy", "version": "1.2.0"},
{"type": "red_flag_detection", "version": "1.0.0"},
{"type": "hallucination_detection", "version": "1.0.0"}
],
# Use automated evaluation for speed
"mode": "automated",
# Route concerning cases for human review
"human_review_rules": [
{
"condition": "safety_score < 80",
"reviewer_pool": "physician",
"priority": "urgent"
}
]
},
# Alert configuration
alerts=[
# Critical: Red flag sensitivity drops
{
"name": "red_flag_sensitivity_critical",
"metric": "red_flag_sensitivity",
"condition": "drops_below",
"threshold": 0.95,
"window": "30_minutes",
"severity": "critical",
"channels": ["pagerduty", "slack:clinical-safety"]
},
# High: Triage accuracy drops
{
"name": "triage_accuracy_degradation",
"metric": "triage_accuracy",
"condition": "drops_below",
"threshold": 0.80,
"window": "1_hour",
"severity": "high",
"channels": ["slack:ml-team", "email:[email protected]"]
},
# Medium: Under-triage rate increases
{
"name": "under_triage_increase",
"metric": "under_triage_rate",
"condition": "exceeds",
"threshold": 0.05,
"window": "2_hours",
"severity": "medium",
"channels": ["slack:ml-team"]
},
# Anomaly: Unusual confidence distribution
{
"name": "confidence_anomaly",
"metric": "confidence_distribution",
"condition": "distribution_shift",
"threshold": 0.3, # KL divergence
"window": "4_hours",
"severity": "medium",
"channels": ["slack:ml-team"]
},
# Volume: Unexpected traffic patterns
{
"name": "traffic_anomaly",
"metric": "log_volume",
"condition": "deviates_from_baseline",
"threshold": 0.5, # 50% deviation
"window": "1_hour",
"severity": "low",
"channels": ["slack:ml-team"]
}
],
# Dashboard configuration
dashboard={
"refresh_interval": 60, # Update every minute
"default_window": "24_hours",
"charts": [
"triage_accuracy_timeline",
"red_flag_sensitivity_timeline",
"confidence_distribution",
"triage_level_breakdown",
"error_rate_by_region"
]
}
)
print(f"Monitor created: {monitor.id}")
print(f"Dashboard URL: {monitor.dashboard_url}")
Step 3: Configure Alert Channels
alert_channels.py
Copy
Ask AI
# Configure notification channels
client.channels.configure({
"pagerduty": {
"type": "pagerduty",
"routing_key": "R01234567890",
"severity_mapping": {
"critical": "critical",
"high": "error",
"medium": "warning"
}
},
"slack:clinical-safety": {
"type": "slack",
"webhook_url": "https://hooks.slack.com/services/xxx/yyy/zzz",
"channel": "#clinical-safety-alerts",
"mention_on_critical": ["@clinical-safety-oncall"]
},
"slack:ml-team": {
"type": "slack",
"webhook_url": "https://hooks.slack.com/services/xxx/yyy/zzz",
"channel": "#ml-alerts"
},
"email:[email protected]": {
"type": "email",
"recipients": ["[email protected]"],
"include_details": True
}
})
Critical Alert Response: Critical alerts (red flag sensitivity drops) should page on-call immediately. These indicate potential patient safety issues that require immediate investigation.
Step 4: Set Up Dashboards
Copy
Ask AI
# Get real-time dashboard data
dashboard = client.monitors.dashboard(
monitor_id=monitor.id,
window="24_hours"
)
print(f"""
Production Dashboard - Last 24 Hours
====================================
Traffic:
Total Logs: {dashboard.traffic.total:,}
Evaluated: {dashboard.traffic.evaluated:,}
Pending Review: {dashboard.traffic.pending_review}
Current Metrics:
Triage Accuracy: {dashboard.metrics.triage_accuracy:.1%}
Red Flag Sensitivity: {dashboard.metrics.red_flag_sensitivity:.1%}
Under-triage Rate: {dashboard.metrics.under_triage_rate:.2%}
Trends (vs yesterday):
Triage Accuracy: {dashboard.trends.triage_accuracy:+.1%}
Red Flag Sensitivity: {dashboard.trends.red_flag_sensitivity:+.1%}
Active Alerts: {len(dashboard.active_alerts)}
""")
for alert in dashboard.active_alerts:
print(f" ⚠️ {alert.name}: {alert.message}")
Step 5: Investigate Alerts
alert_investigation.py
Copy
Ask AI
# When an alert fires, investigate the cause
def investigate_alert(alert_id):
"""Investigate a triggered alert."""
alert = client.alerts.get(alert_id)
print(f"""
Alert Investigation: {alert.name}
==================================
Triggered: {alert.triggered_at}
Severity: {alert.severity}
Current Value: {alert.current_value:.2%}
Threshold: {alert.threshold:.2%}
Window: {alert.window}
""")
# Get contributing factors
analysis = client.alerts.analyze(alert_id)
print("Contributing Factors:")
for factor in analysis.factors:
print(f" - {factor.description}: {factor.contribution:.1%}")
# Get affected samples
affected_samples = client.alerts.affected_samples(
alert_id,
limit=20
)
print(f"\nAffected Samples ({len(affected_samples)} shown):")
for sample in affected_samples:
print(f"""
Sample: {sample.id}
Triage: {sample.output.triage_level}
Issue: {sample.primary_issue}
Confidence: {sample.output.confidence:.1%}
""")
# Check for patterns
patterns = client.alerts.pattern_analysis(alert_id)
print("\nDetected Patterns:")
for pattern in patterns:
print(f"""
Pattern: {pattern.description}
Frequency: {pattern.count} cases
Possible Cause: {pattern.likely_cause}
Suggested Action: {pattern.suggested_action}
""")
return {
"alert": alert,
"analysis": analysis,
"patterns": patterns
}
# Example: Investigate red flag sensitivity drop
investigate_alert("alert_abc123")
Step 6: Automated Remediation
Set up automated responses to certain alert conditions.auto_remediation.py
Copy
Ask AI
# Configure automated responses
client.monitors.configure_remediation(
monitor_id=monitor.id,
remediations=[
# Increase human review on degradation
{
"trigger": "triage_accuracy_degradation",
"action": "increase_review_rate",
"config": {
"new_rate": 0.20, # Review 20% instead of 5%
"duration": "2_hours"
}
},
# Automatic rollback on critical safety issues
{
"trigger": "red_flag_sensitivity_critical",
"action": "traffic_shift",
"config": {
"shift_to": "model:triage-v3.1.0", # Previous stable version
"percentage": 50, # Route 50% to fallback
"duration": "until_resolved"
}
},
# Notify on-call for manual intervention
{
"trigger": "under_triage_increase",
"action": "escalate",
"config": {
"escalation_path": "ml-oncall → clinical-safety → engineering-manager",
"escalate_after": "30_minutes"
}
}
]
)
Human in the Loop: For healthcare AI, automated remediation should focus on increasing human oversight (review rates, traffic shifts) rather than fully autonomous decisions.
Step 7: Regular Reporting
scheduled_reports.py
Copy
Ask AI
# Configure scheduled reports
client.reports.schedule(
name="Weekly Triage Safety Report",
monitor_id=monitor.id,
schedule="weekly", # daily, weekly, monthly
day_of_week="monday",
time="09:00",
timezone="America/New_York",
recipients=[
"[email protected]",
"[email protected]"
],
include=[
"executive_summary",
"metric_trends",
"alert_summary",
"human_review_outcomes",
"top_failure_patterns",
"recommendations"
],
format="pdf"
)
# Generate ad-hoc report
report = client.reports.generate(
monitor_id=monitor.id,
window="7_days",
include=[
"executive_summary",
"detailed_metrics",
"sample_analysis"
]
)
print(f"Report generated: {report.download_url}")
Monitoring Best Practices
| Practice | Details |
|---|---|
| Set baseline first | Run 1-2 weeks of monitoring to establish normal ranges before setting alerts |
| Start with high thresholds | Avoid alert fatigue by starting conservative, then tightening |
| Stratify by segment | Monitor key segments (regions, patient types) separately |
| Review false positives | Regularly tune alerts to reduce noise |
| Document responses | Create runbooks for common alert scenarios |
| Test alert paths | Regularly verify alerts reach the right people |
Integration with Observability Stack
Copy
Ask AI
# Export metrics to your observability platform
client.monitors.configure_export(
monitor_id=monitor.id,
exports=[
{
"type": "prometheus",
"endpoint": "/metrics",
"metrics": [
"rubric_triage_accuracy",
"rubric_red_flag_sensitivity",
"rubric_under_triage_rate",
"rubric_evaluation_latency"
]
},
{
"type": "datadog",
"api_key": "dd_api_key",
"tags": ["service:triage-ai", "env:production"]
},
{
"type": "cloudwatch",
"region": "us-west-2",
"namespace": "Rubric/TriageAI"
}
]
)
