ensemble: moderate-content
description: Multi-step content moderation
agents:
# Step 1: Check for explicit content
- name: check-explicit
operation: think
config:
provider: openai
model: gpt-4o-mini
prompt: |
Analyze this text for explicit content: "${input.text}"
Return JSON: {"explicit": true/false, "confidence": 0-1, "reason": ""}
# Step 2: Check for hate speech
- name: check-hate
operation: think
config:
provider: openai
model: gpt-4o-mini
prompt: |
Analyze this text for hate speech: "${input.text}"
Return JSON: {"hate_speech": true/false, "confidence": 0-1, "reason": ""}
# Step 3: Check for spam
- name: check-spam
operation: think
config:
provider: openai
model: gpt-4o-mini
prompt: |
Analyze this text for spam: "${input.text}"
Return JSON: {"spam": true/false, "confidence": 0-1, "reason": ""}
# Step 4: Aggregate results
- name: aggregate
operation: code
config:
code: |
const explicit = ${check-explicit.output.explicit};
const hate = ${check-hate.output.hate_speech};
const spam = ${check-spam.output.spam};
return {
safe: !explicit && !hate && !spam,
flags: {
explicit,
hate_speech: hate,
spam
},
reasons: [
${check-explicit.output.reason},
${check-hate.output.reason},
${check-spam.output.reason}
].filter(r => r)
};
output:
safe: ${aggregate.output.safe}
flags: ${aggregate.output.flags}
reasons: ${aggregate.output.reasons}