Primitives.org.ai

Guardrails

Safety boundaries and operational limits

Agent Guardrails

Guardrails define the boundaries within which agents operate, ensuring safe, predictable, and compliant behavior.

Guardrails Configuration

import { agent } from 'autonomous-agents'

const agent = agent({
  name: 'safe-agent',

  guardrails: {
    // Rate limits
    maxActionsPerHour: 100,
    maxActionsPerDay: 1000,

    // Content policies
    neverMention: ['competitor-names', 'internal-financials'],
    alwaysInclude: ['disclaimer-for-legal-advice'],

    // Approval requirements
    requireApprovalFor: [
      { action: 'send-email', when: 'external-recipient' },
      { action: 'create-ticket', when: 'priority = critical' },
      { action: 'modify-data', when: 'production-environment' },
    ],
  },
})

Rate Limiting

Action Limits

const guardrails = {
  limits: {
    // Overall limits
    maxActionsPerMinute: 10,
    maxActionsPerHour: 100,
    maxActionsPerDay: 1000,

    // Per-action limits
    actions: {
      'send-email': { perHour: 20, perDay: 100 },
      'create-ticket': { perHour: 50, perDay: 500 },
      'api-call': { perMinute: 60 },
    },

    // Per-recipient limits
    perRecipient: {
      email: { perDay: 5 },
      slack: { perHour: 10 },
    },
  },
}

// Check before action
const canProceed = await agent.checkLimit('send-email')
if (!canProceed) {
  await agent.waitOrEscalate()
}

Spending Limits

const guardrails = {
  spending: {
    // Monetary limits
    maxRefundAmount: 100,
    maxRefundsPerDay: 500,
    maxPurchaseAmount: 50,

    // Resource limits
    maxApiCosts: { perDay: 100, perMonth: 2000 },
    maxComputeMinutes: { perDay: 60 },
  },
}

Content Policies

Prohibited Content

const guardrails = {
  content: {
    // Never include
    prohibited: [
      'competitor-names',
      'internal-financials',
      'employee-salaries',
      'unreleased-products',
      'personal-opinions',
    ],

    // Topics to avoid
    avoidTopics: ['politics', 'religion', 'controversial-subjects'],

    // Patterns to block
    blockedPatterns: [
      /\b(password|secret|api.?key)\s*[:=]/i,
      /\b\d{3}-\d{2}-\d{4}\b/,  // SSN pattern
    ],
  },
}

// Check content before sending
const safe = await agent.checkContent(message)
if (!safe) {
  message = await agent.sanitize(message)
}

Required Content

const guardrails = {
  content: {
    // Always include when relevant
    required: {
      'legal-topics': 'This is not legal advice. Please consult a lawyer.',
      'medical-topics': 'This is not medical advice. Please consult a doctor.',
      'financial-topics': 'Past performance does not guarantee future results.',
    },

    // Signatures
    signature: {
      email: '- Your AI Assistant\nThis message was generated by AI.',
      chat: '🤖 AI Assistant',
    },
  },
}

Approval Requirements

const guardrails = {
  approvals: {
    // Actions requiring approval
    required: [
      {
        action: 'send-external-email',
        approver: 'communications-team',
        timeout: '4h',
      },
      {
        action: 'modify-production-data',
        approver: 'engineering-lead',
        timeout: '1h',
      },
      {
        action: 'large-refund',
        condition: 'amount > 500',
        approver: 'finance-team',
        timeout: '24h',
      },
    ],

    // What to do if approval times out
    onTimeout: 'escalate',  // 'escalate' | 'deny' | 'auto-approve'
  },
}

// Request approval
const approval = await agent.requestApproval({
  action: 'send-external-email',
  details: emailDraft,
  reason: 'Customer follow-up',
})

if (approval.granted) {
  await agent.execute(action)
}

Time Restrictions

const guardrails = {
  schedule: {
    // Active hours
    activeHours: {
      start: '09:00',
      end: '18:00',
      timezone: 'America/New_York',
    },

    // Days of operation
    activeDays: ['monday', 'tuesday', 'wednesday', 'thursday', 'friday'],

    // Blackout periods
    blackouts: [
      { name: 'holidays', dates: ['2024-12-25', '2024-01-01'] },
      { name: 'maintenance', schedule: 'sunday 02:00-06:00' },
    ],

    // What to do outside hours
    outsideHours: {
      action: 'queue',  // 'queue' | 'reject' | 'emergency-only'
      message: 'I am currently offline. Your request will be handled tomorrow.',
    },
  },
}

Access Control

const guardrails = {
  access: {
    // Data access restrictions
    data: {
      canRead: ['public-docs', 'team-resources'],
      cannotRead: ['hr-records', 'financial-data', 'legal-documents'],
    },

    // System access
    systems: {
      allowed: ['ticketing', 'documentation', 'calendar'],
      denied: ['billing', 'admin-panel', 'production-db'],
    },

    // User data
    userData: {
      canAccess: ['name', 'email', 'preferences'],
      cannotAccess: ['ssn', 'payment-info', 'medical-records'],
    },
  },
}

Output Validation

const guardrails = {
  validation: {
    // Validate all outputs
    outputs: [
      {
        type: 'email',
        validators: ['no-pii', 'tone-check', 'brand-voice'],
      },
      {
        type: 'code',
        validators: ['security-scan', 'license-check'],
      },
      {
        type: 'customer-response',
        validators: ['sentiment', 'accuracy', 'completeness'],
      },
    ],

    // On validation failure
    onFailure: 'block-and-alert',  // 'block' | 'warn' | 'block-and-alert'
  },
}

// Validate before sending
const validation = await agent.validate(output)
if (!validation.passed) {
  await agent.alert('validation-failed', validation.issues)
}

Escalation Rules

const guardrails = {
  escalation: {
    // Automatic escalation triggers
    triggers: [
      { condition: 'confidence < 0.5', to: 'senior-support' },
      { condition: 'customer.sentiment = very-negative', to: 'manager' },
      { condition: 'topic matches legal|lawsuit|attorney', to: 'legal-team' },
      { condition: 'error-rate > 0.1', to: 'engineering' },
    ],

    // Escalation path
    path: ['tier-1', 'tier-2', 'manager', 'director'],

    // Maximum attempts before escalation
    maxAttempts: 3,
  },
}

Audit and Compliance

const guardrails = {
  audit: {
    // Log all actions
    logAll: true,

    // Detailed logging for sensitive actions
    detailed: ['modify-data', 'send-external', 'access-pii'],

    // Retention
    retention: '7y',  // 7 years for compliance

    // Required fields in logs
    requiredFields: ['timestamp', 'action', 'actor', 'target', 'outcome'],
  },

  compliance: {
    // Regulatory frameworks
    frameworks: ['SOC2', 'GDPR', 'HIPAA'],

    // Data residency
    dataResidency: 'US',

    // Encryption requirements
    encryption: {
      atRest: true,
      inTransit: true,
      keyRotation: '90d',
    },
  },
}

Emergency Controls

// Emergency stop
await agent.emergencyStop({
  reason: 'Detected anomalous behavior',
  preserveState: true,
})

// Pause agent
await agent.pause({
  duration: '1h',
  reason: 'Investigating reported issue',
})

// Resume with restrictions
await agent.resume({
  restrictions: ['read-only', 'no-external-communication'],
})

// Kill switch
agent.onKillSwitch(async () => {
  await agent.gracefulShutdown()
  await agent.notifyAdmins('Kill switch activated')
})

Monitoring and Alerts

const guardrails = {
  monitoring: {
    // Metrics to track
    metrics: ['action-rate', 'error-rate', 'response-time', 'escalation-rate'],

    // Alert thresholds
    alerts: [
      { metric: 'error-rate', threshold: 0.05, action: 'alert' },
      { metric: 'error-rate', threshold: 0.1, action: 'pause' },
      { metric: 'action-rate', threshold: 200, action: 'throttle' },
    ],

    // Alert destinations
    alertTo: ['ops-team@company.com', '#agent-alerts'],
  },
}

Best Practices

  1. Defense in depth - Multiple layers of guardrails
  2. Start restrictive - Loosen limits as trust builds
  3. Clear escalation - Always have human fallback
  4. Comprehensive logging - Audit all actions
  5. Regular review - Adjust guardrails based on incidents
  6. Emergency controls - Always have a kill switch
Was this page helpful?

On this page