Complete Pipeline
Copy
ensemble: document-pipeline
agents:
# Extract text
- name: extract
operation: think
config:
provider: openai
model: gpt-4o
image: ${input.document_url}
prompt: Extract all text from this document
# Classify document type
- name: classify
operation: think
config:
prompt: |
Classify this document:
- invoice
- receipt
- contract
- resume
- other
Text: ${extract.output}
Return JSON: { "type": string, "confidence": number }
# Extract structured data
- name: extract-invoice
condition: ${JSON.parse(classify.output).type === 'invoice'}
operation: think
config:
prompt: |
Extract invoice data:
${extract.output}
Return JSON: {
"invoice_number": string,
"date": string,
"vendor": string,
"total": number,
"line_items": []
}
# Validate extracted data
- name: validate
operation: validate
inputs:
data: ${extract-invoice.output}
schema: ${[email protected]}
# Store in database
- name: store
condition: ${validate.output.valid}
operation: data
config:
backend: d1
binding: DB
operation: execute
sql: INSERT INTO invoices ...
params: ${validate.output.data}
Invoice Processing
Copy
ensemble: process-invoice
agents:
# OCR + extraction
- name: extract
operation: think
config:
provider: openai
model: gpt-4o
image: ${input.invoice_image}
prompt: |
Extract invoice information:
- Invoice number
- Date
- Vendor name and address
- Line items (description, quantity, price)
- Subtotal, tax, total
Return as JSON.
# Validate amounts
- name: validate-math
operation: code
config:
script: scripts/validate-invoice-math
input:
invoice: ${extract.output}
```typescript
// scripts/validate-invoice-math.ts
import type { AgentExecutionContext } from '@ensemble-edge/conductor'
export default function validateInvoiceMath(context: AgentExecutionContext) {
const { invoice } = context.input
const invoiceData = JSON.parse(invoice)
const lineTotal = invoiceData.line_items.reduce((sum: number, item: any) =>
sum + (item.quantity * item.price), 0
)
return {
valid: Math.abs(lineTotal - invoiceData.subtotal) < 0.01,
calculated_total: lineTotal,
invoice_total: invoiceData.subtotal
}
}
Copy
# Check vendor
- name: check-vendor
operation: data
config:
backend: d1
binding: DB
operation: query
sql: SELECT * FROM approved_vendors WHERE name = ?
params: [${JSON.parse(extract.output).vendor}]
# Require approval for new vendors
- name: approve-vendor
condition: ${check-vendor.output.length === 0}
operation: hitl
inputs:
data: ${JSON.parse(extract.output)}
prompt: "New vendor - approve invoice?"
approvers: [[email protected]]
# Auto-approve known vendors under threshold
- name: auto-approve
condition: ${check-vendor.output.length > 0 && JSON.parse(extract.output).total < 5000}
operation: code
config:
script: scripts/return-auto-approval
Copy
// scripts/return-auto-approval.ts
import type { AgentExecutionContext } from '@ensemble-edge/conductor'
export default function returnAutoApproval(context: AgentExecutionContext) {
return { approved: true, auto: true }
}
Copy
# Store invoice
- name: store
condition: ${auto-approve.executed || approve-vendor.output.approved}
operation: data
config:
backend: d1
binding: DB
operation: execute
sql: INSERT INTO invoices ...
Resume Screening
Copy
ensemble: screen-resume
agents:
# Extract resume data
- name: extract
operation: think
config:
provider: openai
model: gpt-4o
prompt: |
Extract from resume:
${input.resume_text}
Return JSON: {
"name": string,
"email": string,
"phone": string,
"experience_years": number,
"skills": [string],
"education": [{ "degree": string, "school": string, "year": number }],
"work_history": [{ "company": string, "title": string, "duration": string }]
}
# Score against job requirements
- name: score
operation: think
config:
provider: openai
model: gpt-4o
prompt: |
Score resume fit (0-100) for this position:
Requirements: ${input.job_requirements}
Resume: ${extract.output}
Return JSON: {
"overall_score": number,
"scores": {
"experience": number,
"skills": number,
"education": number
},
"strengths": [string],
"gaps": [string],
"recommendation": "interview" | "maybe" | "reject"
}
# Shortlist high scores
- name: shortlist
condition: ${JSON.parse(score.output).overall_score >= 70}
operation: data
config:
backend: d1
binding: DB
operation: execute
sql: |
INSERT INTO shortlisted_candidates (name, email, score, data, job_id)
VALUES (?, ?, ?, ?, ?)
params:
- ${JSON.parse(extract.output).name}
- ${JSON.parse(extract.output).email}
- ${JSON.parse(score.output).overall_score}
- ${extract.output}
- ${input.job_id}
# Auto-reject low scores
- name: auto-reject
condition: ${JSON.parse(score.output).overall_score < 50}
operation: email
config:
to: ${JSON.parse(extract.output).email}
subject: "Application Update"
body: |
Thank you for your interest...
Contract Analysis
Copy
ensemble: analyze-contract
agents:
# Extract key terms
- name: extract-terms
operation: think
config:
provider: openai
model: gpt-4o
prompt: |
Extract key contract terms:
${input.contract_text}
Return JSON: {
"parties": [string],
"effective_date": string,
"termination_date": string,
"payment_terms": string,
"termination_clause": string,
"liability_cap": string
}
# Identify risks
- name: identify-risks
operation: think
config:
provider: openai
model: gpt-4o
prompt: |
Identify risks in this contract:
${input.contract_text}
Return JSON: {
"risks": [
{
"category": "financial" | "legal" | "operational",
"severity": "high" | "medium" | "low",
"description": string,
"recommendation": string
}
]
}
# Compare to standard template
- name: compare-template
operation: think
config:
prompt: |
Compare contract to our standard template:
Contract: ${input.contract_text}
Template: ${[email protected]}
Identify deviations.
# Legal review for high-risk
- name: legal-review
condition: ${JSON.parse(identify-risks.output).risks.some(r => r.severity === 'high')}
operation: hitl
inputs:
data:
terms: ${extract-terms.output}
risks: ${identify-risks.output}
deviations: ${compare-template.output}
prompt: "Contract has high-risk clauses - legal review required"
approvers: [[email protected]]
Form Processing
Copy
ensemble: process-form
agents:
# Extract form fields
- name: extract
operation: think
config:
provider: openai
model: gpt-4o
image: ${input.form_image}
prompt: |
Extract all form fields and values.
Return as JSON object.
# Validate completeness
- name: check-complete
operation: code
config:
script: scripts/check-form-completeness
input:
form_data: ${extract.output}
required_fields: ${input.required_fields}
```typescript
// scripts/check-form-completeness.ts
import type { AgentExecutionContext } from '@ensemble-edge/conductor'
export default function checkFormCompleteness(context: AgentExecutionContext) {
const { form_data, required_fields } = context.input
const form = JSON.parse(form_data)
const missing = required_fields.filter((field: string) => !form[field])
return {
complete: missing.length === 0,
missing_fields: missing
}
}
Copy
# Request missing info
- name: request-info
condition: ${!check-complete.output.complete}
operation: email
config:
to: ${input.submitter_email}
subject: "Form incomplete"
body: |
Please provide:
${check-complete.output.missing_fields.join('\n')}
# Process complete forms
- name: process
condition: ${check-complete.output.complete}
operation: data
config:
backend: d1
binding: DB
operation: execute
sql: INSERT INTO form_submissions ...
Best Practices
1. Vision Models for ImagesCopy
config:
provider: openai
model: gpt-4o # Supports vision
image: ${input.document_url}
Copy
prompt: |
Extract data.
Return JSON: { "field1": type, "field2": type }
Copy
- name: validate
operation: validate
inputs:
schema: ${[email protected]}
Copy
- name: review
condition: ${confidence < 0.8 || high_value}
operation: hitl
Copy
- name: log
operation: data
config:
backend: d1
binding: DB
operation: execute
sql: INSERT INTO processing_log ...

