Document Intelligence

Complete Pipeline

ensemble: document-pipeline

agents:
  # Extract text
  - name: extract
    operation: think
    config:
      provider: openai
      model: gpt-4o
      image: ${input.document_url}
      prompt: Extract all text from this document

  # Classify document type
  - name: classify
    operation: think
    config:
      prompt: |
        Classify this document:
        - invoice
        - receipt
        - contract
        - resume
        - other

        Text: ${extract.output}
        Return JSON: { "type": string, "confidence": number }

  # Extract structured data
  - name: extract-invoice
    condition: ${JSON.parse(classify.output).type === 'invoice'}
    operation: think
    config:
      prompt: |
        Extract invoice data:
        ${extract.output}

        Return JSON: {
          "invoice_number": string,
          "date": string,
          "vendor": string,
          "total": number,
          "line_items": []
        }

  # Validate extracted data
  - name: validate
    operation: validate
    inputs:
      data: ${extract-invoice.output}
      schema: ${component.invoice-schema@v1.0.0}

  # Store in database
  - name: store
    condition: ${validate.output.valid}
    operation: data
    config:
      backend: d1
      binding: DB
      operation: execute
      sql: INSERT INTO invoices ...
      params: ${validate.output.data}

Invoice Processing

ensemble: process-invoice

agents:
  # OCR + extraction
  - name: extract
    operation: think
    config:
      provider: openai
      model: gpt-4o
      image: ${input.invoice_image}
      prompt: |
        Extract invoice information:
        - Invoice number
        - Date
        - Vendor name and address
        - Line items (description, quantity, price)
        - Subtotal, tax, total

        Return as JSON.

  # Validate amounts
  - name: validate-math
    operation: code
    config:
      script: scripts/validate-invoice-math
    input:
      invoice: ${extract.output}

```typescript
// scripts/validate-invoice-math.ts
import type { AgentExecutionContext } from '@ensemble-edge/conductor'

export default function validateInvoiceMath(context: AgentExecutionContext) {
  const { invoice } = context.input
  const invoiceData = JSON.parse(invoice)

  const lineTotal = invoiceData.line_items.reduce((sum: number, item: any) =>
    sum + (item.quantity * item.price), 0
  )

  return {
    valid: Math.abs(lineTotal - invoiceData.subtotal) < 0.01,
    calculated_total: lineTotal,
    invoice_total: invoiceData.subtotal
  }
}

  # Check vendor
  - name: check-vendor
    operation: data
    config:
      backend: d1
      binding: DB
      operation: query
      sql: SELECT * FROM approved_vendors WHERE name = ?
      params: [${JSON.parse(extract.output).vendor}]

  # Require approval for new vendors
  - name: approve-vendor
    condition: ${check-vendor.output.length === 0}
    operation: hitl
    inputs:
      data: ${JSON.parse(extract.output)}
      prompt: "New vendor - approve invoice?"
      approvers: [finance@example.com]

  # Auto-approve known vendors under threshold
  - name: auto-approve
    condition: ${check-vendor.output.length > 0 && JSON.parse(extract.output).total < 5000}
    operation: code
    config:
      script: scripts/return-auto-approval

// scripts/return-auto-approval.ts
import type { AgentExecutionContext } from '@ensemble-edge/conductor'

export default function returnAutoApproval(context: AgentExecutionContext) {
  return { approved: true, auto: true }
}

  # Store invoice
  - name: store
    condition: ${auto-approve.executed || approve-vendor.output.approved}
    operation: data
    config:
      backend: d1
      binding: DB
      operation: execute
      sql: INSERT INTO invoices ...

Resume Screening

ensemble: screen-resume

agents:
  # Extract resume data
  - name: extract
    operation: think
    config:
      provider: openai
      model: gpt-4o
      prompt: |
        Extract from resume:
        ${input.resume_text}

        Return JSON: {
          "name": string,
          "email": string,
          "phone": string,
          "experience_years": number,
          "skills": [string],
          "education": [{ "degree": string, "school": string, "year": number }],
          "work_history": [{ "company": string, "title": string, "duration": string }]
        }

  # Score against job requirements
  - name: score
    operation: think
    config:
      provider: openai
      model: gpt-4o
      prompt: |
        Score resume fit (0-100) for this position:

        Requirements: ${input.job_requirements}

        Resume: ${extract.output}

        Return JSON: {
          "overall_score": number,
          "scores": {
            "experience": number,
            "skills": number,
            "education": number
          },
          "strengths": [string],
          "gaps": [string],
          "recommendation": "interview" | "maybe" | "reject"
        }

  # Shortlist high scores
  - name: shortlist
    condition: ${JSON.parse(score.output).overall_score >= 70}
    operation: data
    config:
      backend: d1
      binding: DB
      operation: execute
      sql: |
        INSERT INTO shortlisted_candidates (name, email, score, data, job_id)
        VALUES (?, ?, ?, ?, ?)
      params:
        - ${JSON.parse(extract.output).name}
        - ${JSON.parse(extract.output).email}
        - ${JSON.parse(score.output).overall_score}
        - ${extract.output}
        - ${input.job_id}

  # Auto-reject low scores
  - name: auto-reject
    condition: ${JSON.parse(score.output).overall_score < 50}
    operation: email
    config:
      to: ${JSON.parse(extract.output).email}
      subject: "Application Update"
      body: |
        Thank you for your interest...

Contract Analysis

ensemble: analyze-contract

agents:
  # Extract key terms
  - name: extract-terms
    operation: think
    config:
      provider: openai
      model: gpt-4o
      prompt: |
        Extract key contract terms:
        ${input.contract_text}

        Return JSON: {
          "parties": [string],
          "effective_date": string,
          "termination_date": string,
          "payment_terms": string,
          "termination_clause": string,
          "liability_cap": string
        }

  # Identify risks
  - name: identify-risks
    operation: think
    config:
      provider: openai
      model: gpt-4o
      prompt: |
        Identify risks in this contract:
        ${input.contract_text}

        Return JSON: {
          "risks": [
            {
              "category": "financial" | "legal" | "operational",
              "severity": "high" | "medium" | "low",
              "description": string,
              "recommendation": string
            }
          ]
        }

  # Compare to standard template
  - name: compare-template
    operation: think
    config:
      prompt: |
        Compare contract to our standard template:

        Contract: ${input.contract_text}
        Template: ${component.standard-contract@v1.0.0}

        Identify deviations.

  # Legal review for high-risk
  - name: legal-review
    condition: ${JSON.parse(identify-risks.output).risks.some(r => r.severity === 'high')}
    operation: hitl
    inputs:
      data:
        terms: ${extract-terms.output}
        risks: ${identify-risks.output}
        deviations: ${compare-template.output}
      prompt: "Contract has high-risk clauses - legal review required"
      approvers: [legal@example.com]

Form Processing

ensemble: process-form

agents:
  # Extract form fields
  - name: extract
    operation: think
    config:
      provider: openai
      model: gpt-4o
      image: ${input.form_image}
      prompt: |
        Extract all form fields and values.
        Return as JSON object.

  # Validate completeness
  - name: check-complete
    operation: code
    config:
      script: scripts/check-form-completeness
    input:
      form_data: ${extract.output}
      required_fields: ${input.required_fields}

```typescript
// scripts/check-form-completeness.ts
import type { AgentExecutionContext } from '@ensemble-edge/conductor'

export default function checkFormCompleteness(context: AgentExecutionContext) {
  const { form_data, required_fields } = context.input
  const form = JSON.parse(form_data)

  const missing = required_fields.filter((field: string) => !form[field])

  return {
    complete: missing.length === 0,
    missing_fields: missing
  }
}

  # Request missing info
  - name: request-info
    condition: ${!check-complete.output.complete}
    operation: email
    config:
      to: ${input.submitter_email}
      subject: "Form incomplete"
      body: |
        Please provide:
        ${check-complete.output.missing_fields.join('\n')}

  # Process complete forms
  - name: process
    condition: ${check-complete.output.complete}
    operation: data
    config:
      backend: d1
      binding: DB
      operation: execute
      sql: INSERT INTO form_submissions ...

Best Practices

1. Vision Models for Images

config:
  provider: openai
  model: gpt-4o  # Supports vision
  image: ${input.document_url}

2. Structured Output

prompt: |
  Extract data.
  Return JSON: { "field1": type, "field2": type }

3. Validation

- name: validate
  operation: validate
  inputs:
    schema: ${component.schema@v1.0.0}

4. Human Review

- name: review
  condition: ${confidence < 0.8 || high_value}
  operation: hitl

5. Audit Trail

- name: log
  operation: data
  config:
    backend: d1
    binding: DB
    operation: execute
    sql: INSERT INTO processing_log ...

Next Steps

think Operation

Vision models

Validate Operation

Data validation

HITL Approval

Human review

Data Processing

ETL workflows

Conductor

Getting Started

Core Concepts

Building

Components

Operations Reference

Plugins

Starter Kit

Playbooks

Reference

Complete Pipeline

Invoice Processing

Resume Screening

Contract Analysis

Form Processing

Best Practices

Next Steps

think Operation

Validate Operation

HITL Approval

Data Processing

Conductor

Getting Started

Core Concepts

Building

Components

Operations Reference

Plugins

Starter Kit

Playbooks

Reference

​Complete Pipeline

​Invoice Processing

​Resume Screening

​Contract Analysis

​Form Processing

​Best Practices

​Next Steps

think Operation

Validate Operation

HITL Approval

Data Processing

Complete Pipeline

Invoice Processing

Resume Screening

Contract Analysis

Form Processing

Best Practices

Next Steps