Skip to main content

Why Monitor Production AI?

Healthcare AI can degrade over time due to data drift, changing patient populations, or subtle model issues. Continuous monitoring ensures you catch problems early.
RiskExampleMonitoring Solution
Data driftNew symptom patterns post-pandemicDistribution monitoring
Performance degradationAccuracy drops over monthsMetric tracking with alerts
Safety eventsMissed red flags in productionReal-time safety monitoring
Edge casesUnusual patient presentationsAnomaly detection

Step 1: Configure Production Logging

First, instrument your production system to send AI interactions to Rubric.
production_logging.py
from rubric import Rubric
import asyncio

# Initialize client with production settings
client = Rubric(
    api_key="rb_live_xxxxxxxx",

    # Production optimizations
    async_mode=True,          # Non-blocking logging
    batch_size=50,            # Batch logs for efficiency
    flush_interval=5.0,       # Flush every 5 seconds
    retry_on_failure=True,    # Retry failed logs
    max_retries=3
)

async def handle_triage_call(call_data):
    """Process a triage call and log to Rubric."""

    # Your AI model inference
    ai_result = await triage_model.predict(call_data)

    # Log to Rubric (non-blocking)
    await client.logs.create_async(
        project="voice-triage-production",

        input={
            "transcript": call_data.transcript,
            "patient_demographics": call_data.demographics,
            "audio_url": call_data.audio_url
        },

        output={
            "triage_level": ai_result.triage_level,
            "confidence": ai_result.confidence,
            "extracted_symptoms": ai_result.symptoms,
            "red_flags_detected": ai_result.red_flags,
            "disposition": ai_result.disposition
        },

        metadata={
            "model_version": "triage-v3.2.1",
            "latency_ms": ai_result.latency_ms,
            "session_id": call_data.session_id,
            "region": call_data.region
        },

        # Enable sampling for evaluation
        sample_for_evaluation=True,
        sample_rate=0.05  # Evaluate 5% of calls
    )

    return ai_result


# Ensure logs are flushed on shutdown
async def shutdown():
    await client.flush()
    await client.close()

Step 2: Create Monitoring Rules

monitoring_setup.py
from rubric import Rubric

client = Rubric()

# Create production monitor
monitor = client.monitors.create(
    name="Voice Triage Production Monitor",
    project="voice-triage-production",

    # Sampling configuration
    sampling={
        "rate": 0.05,  # Sample 5% of traffic
        "stratify_by": ["metadata.region", "output.triage_level"],
        "minimum_per_stratum": 10  # At least 10 per category
    },

    # Evaluation configuration
    evaluation={
        "evaluators": [
            {"type": "triage_accuracy", "version": "1.2.0"},
            {"type": "red_flag_detection", "version": "1.0.0"},
            {"type": "hallucination_detection", "version": "1.0.0"}
        ],

        # Use automated evaluation for speed
        "mode": "automated",

        # Route concerning cases for human review
        "human_review_rules": [
            {
                "condition": "safety_score < 80",
                "reviewer_pool": "physician",
                "priority": "urgent"
            }
        ]
    },

    # Alert configuration
    alerts=[
        # Critical: Red flag sensitivity drops
        {
            "name": "red_flag_sensitivity_critical",
            "metric": "red_flag_sensitivity",
            "condition": "drops_below",
            "threshold": 0.95,
            "window": "30_minutes",
            "severity": "critical",
            "channels": ["pagerduty", "slack:clinical-safety"]
        },

        # High: Triage accuracy drops
        {
            "name": "triage_accuracy_degradation",
            "metric": "triage_accuracy",
            "condition": "drops_below",
            "threshold": 0.80,
            "window": "1_hour",
            "severity": "high",
            "channels": ["slack:ml-team", "email:[email protected]"]
        },

        # Medium: Under-triage rate increases
        {
            "name": "under_triage_increase",
            "metric": "under_triage_rate",
            "condition": "exceeds",
            "threshold": 0.05,
            "window": "2_hours",
            "severity": "medium",
            "channels": ["slack:ml-team"]
        },

        # Anomaly: Unusual confidence distribution
        {
            "name": "confidence_anomaly",
            "metric": "confidence_distribution",
            "condition": "distribution_shift",
            "threshold": 0.3,  # KL divergence
            "window": "4_hours",
            "severity": "medium",
            "channels": ["slack:ml-team"]
        },

        # Volume: Unexpected traffic patterns
        {
            "name": "traffic_anomaly",
            "metric": "log_volume",
            "condition": "deviates_from_baseline",
            "threshold": 0.5,  # 50% deviation
            "window": "1_hour",
            "severity": "low",
            "channels": ["slack:ml-team"]
        }
    ],

    # Dashboard configuration
    dashboard={
        "refresh_interval": 60,  # Update every minute
        "default_window": "24_hours",
        "charts": [
            "triage_accuracy_timeline",
            "red_flag_sensitivity_timeline",
            "confidence_distribution",
            "triage_level_breakdown",
            "error_rate_by_region"
        ]
    }
)

print(f"Monitor created: {monitor.id}")
print(f"Dashboard URL: {monitor.dashboard_url}")

Step 3: Configure Alert Channels

alert_channels.py
# Configure notification channels
client.channels.configure({
    "pagerduty": {
        "type": "pagerduty",
        "routing_key": "R01234567890",
        "severity_mapping": {
            "critical": "critical",
            "high": "error",
            "medium": "warning"
        }
    },

    "slack:clinical-safety": {
        "type": "slack",
        "webhook_url": "https://hooks.slack.com/services/xxx/yyy/zzz",
        "channel": "#clinical-safety-alerts",
        "mention_on_critical": ["@clinical-safety-oncall"]
    },

    "slack:ml-team": {
        "type": "slack",
        "webhook_url": "https://hooks.slack.com/services/xxx/yyy/zzz",
        "channel": "#ml-alerts"
    },

    "email:[email protected]": {
        "type": "email",
        "recipients": ["[email protected]"],
        "include_details": True
    }
})
Critical Alert Response: Critical alerts (red flag sensitivity drops) should page on-call immediately. These indicate potential patient safety issues that require immediate investigation.

Step 4: Set Up Dashboards

# Get real-time dashboard data
dashboard = client.monitors.dashboard(
    monitor_id=monitor.id,
    window="24_hours"
)

print(f"""
Production Dashboard - Last 24 Hours
====================================

Traffic:
  Total Logs: {dashboard.traffic.total:,}
  Evaluated: {dashboard.traffic.evaluated:,}
  Pending Review: {dashboard.traffic.pending_review}

Current Metrics:
  Triage Accuracy: {dashboard.metrics.triage_accuracy:.1%}
  Red Flag Sensitivity: {dashboard.metrics.red_flag_sensitivity:.1%}
  Under-triage Rate: {dashboard.metrics.under_triage_rate:.2%}

Trends (vs yesterday):
  Triage Accuracy: {dashboard.trends.triage_accuracy:+.1%}
  Red Flag Sensitivity: {dashboard.trends.red_flag_sensitivity:+.1%}

Active Alerts: {len(dashboard.active_alerts)}
""")

for alert in dashboard.active_alerts:
    print(f"  ⚠️ {alert.name}: {alert.message}")

Step 5: Investigate Alerts

alert_investigation.py
# When an alert fires, investigate the cause

def investigate_alert(alert_id):
    """Investigate a triggered alert."""

    alert = client.alerts.get(alert_id)

    print(f"""
    Alert Investigation: {alert.name}
    ==================================
    Triggered: {alert.triggered_at}
    Severity: {alert.severity}
    Current Value: {alert.current_value:.2%}
    Threshold: {alert.threshold:.2%}
    Window: {alert.window}
    """)

    # Get contributing factors
    analysis = client.alerts.analyze(alert_id)

    print("Contributing Factors:")
    for factor in analysis.factors:
        print(f"  - {factor.description}: {factor.contribution:.1%}")

    # Get affected samples
    affected_samples = client.alerts.affected_samples(
        alert_id,
        limit=20
    )

    print(f"\nAffected Samples ({len(affected_samples)} shown):")
    for sample in affected_samples:
        print(f"""
    Sample: {sample.id}
      Triage: {sample.output.triage_level}
      Issue: {sample.primary_issue}
      Confidence: {sample.output.confidence:.1%}
        """)

    # Check for patterns
    patterns = client.alerts.pattern_analysis(alert_id)

    print("\nDetected Patterns:")
    for pattern in patterns:
        print(f"""
    Pattern: {pattern.description}
      Frequency: {pattern.count} cases
      Possible Cause: {pattern.likely_cause}
      Suggested Action: {pattern.suggested_action}
        """)

    return {
        "alert": alert,
        "analysis": analysis,
        "patterns": patterns
    }


# Example: Investigate red flag sensitivity drop
investigate_alert("alert_abc123")

Step 6: Automated Remediation

Set up automated responses to certain alert conditions.
auto_remediation.py
# Configure automated responses
client.monitors.configure_remediation(
    monitor_id=monitor.id,

    remediations=[
        # Increase human review on degradation
        {
            "trigger": "triage_accuracy_degradation",
            "action": "increase_review_rate",
            "config": {
                "new_rate": 0.20,  # Review 20% instead of 5%
                "duration": "2_hours"
            }
        },

        # Automatic rollback on critical safety issues
        {
            "trigger": "red_flag_sensitivity_critical",
            "action": "traffic_shift",
            "config": {
                "shift_to": "model:triage-v3.1.0",  # Previous stable version
                "percentage": 50,  # Route 50% to fallback
                "duration": "until_resolved"
            }
        },

        # Notify on-call for manual intervention
        {
            "trigger": "under_triage_increase",
            "action": "escalate",
            "config": {
                "escalation_path": "ml-oncall → clinical-safety → engineering-manager",
                "escalate_after": "30_minutes"
            }
        }
    ]
)
Human in the Loop: For healthcare AI, automated remediation should focus on increasing human oversight (review rates, traffic shifts) rather than fully autonomous decisions.

Step 7: Regular Reporting

scheduled_reports.py
# Configure scheduled reports
client.reports.schedule(
    name="Weekly Triage Safety Report",
    monitor_id=monitor.id,

    schedule="weekly",  # daily, weekly, monthly
    day_of_week="monday",
    time="09:00",
    timezone="America/New_York",

    recipients=[
        "[email protected]",
        "[email protected]"
    ],

    include=[
        "executive_summary",
        "metric_trends",
        "alert_summary",
        "human_review_outcomes",
        "top_failure_patterns",
        "recommendations"
    ],

    format="pdf"
)

# Generate ad-hoc report
report = client.reports.generate(
    monitor_id=monitor.id,
    window="7_days",
    include=[
        "executive_summary",
        "detailed_metrics",
        "sample_analysis"
    ]
)

print(f"Report generated: {report.download_url}")

Monitoring Best Practices

PracticeDetails
Set baseline firstRun 1-2 weeks of monitoring to establish normal ranges before setting alerts
Start with high thresholdsAvoid alert fatigue by starting conservative, then tightening
Stratify by segmentMonitor key segments (regions, patient types) separately
Review false positivesRegularly tune alerts to reduce noise
Document responsesCreate runbooks for common alert scenarios
Test alert pathsRegularly verify alerts reach the right people

Integration with Observability Stack

# Export metrics to your observability platform
client.monitors.configure_export(
    monitor_id=monitor.id,

    exports=[
        {
            "type": "prometheus",
            "endpoint": "/metrics",
            "metrics": [
                "rubric_triage_accuracy",
                "rubric_red_flag_sensitivity",
                "rubric_under_triage_rate",
                "rubric_evaluation_latency"
            ]
        },
        {
            "type": "datadog",
            "api_key": "dd_api_key",
            "tags": ["service:triage-ai", "env:production"]
        },
        {
            "type": "cloudwatch",
            "region": "us-west-2",
            "namespace": "Rubric/TriageAI"
        }
    ]
)