\documentclass[11pt]{article}
\usepackage[margin=1in]{geometry}
\usepackage{booktabs}
\usepackage{hyperref}
\title{Document Processing Benchmark Notes}
\author{Nadia Keller}
\date{March 2026}
\begin{document}
\maketitle
\begin{abstract}
We compared structured extraction, markdown conversion, and spreadsheet generation workflows across invoices, scanned warehouse sheets, and compliance packets.
\end{abstract}
\section{Scope}
The benchmark covered 120 source files spanning PDF, DOCX, JPEG, and LaTeX inputs. We measured field accuracy, table retention, and handoff effort for downstream automation.
\section{Summary Table}
\begin{tabular}{lrr}
\toprule
Workflow & Accuracy & Median Runtime \\
\midrule
Invoice extraction & 98.4\% & 1.8s \\
Markdown conversion & 96.9\% & 1.2s \\
Sheet generation & 100.0\% & 0.7s \\
\bottomrule
\end{tabular}
\section{Key Findings}
Structured document APIs reduce glue code, preserve tabular content better than OCR-only pipelines, and shorten review time for finance operations.
\begin{itemize}
\item OCR-only pipelines lost row groupings in 17\% of warehouse tables.
\item Markdown output remained suitable for LLM ingestion without custom cleanup.
\item Spreadsheet generation removed a manual CSV reformatting step from the finance workflow.
\end{itemize}
\section{Next Steps}
Extend the benchmark to receipts, insurance packets, and multi-file extraction. Publish the evaluation harness after the April review.
\end{document}
Output Preview
title
Document Processing Benchmark Notes
author
Nadia Keller
abstract
We compared structured extraction, markdown conversion, and spreadsheet generation workflows across invoices, scanned warehouse sheets, and compliance packets.
benchmark_scope
120 files across PDF, DOCX, JPEG, and LaTeX inputs
key_findings
Structured document APIs reduce glue code, preserve tabular content better than OCR-only pipelines, and shorten review time for finance operations
next_step
Extend the benchmark to receipts, insurance packets, and multi-file extraction. Publish the evaluation harness after the April review.
{"success":true,"data":{"invoice_number":{"type":"TEXT","value":"INV-2024-0042","confidence":0.97,"citations":["Invoice #INV-2024-0042"],"source":"accounts-payable-invoice.pdf"},"vendor":{"type":"TEXT","value":"Northwind Accounting Services GmbH","confidence":0.98,"citations":["Northwind Accounting Services GmbH"],"source":"accounts-payable-invoice.pdf"},"due_date":{"type":"DATE","value":"2024-04-14","confidence":0.96,"citations":["Due Date: 2024-04-14"],"source":"accounts-payable-invoice.pdf"},"line_items":{"type":"ARRAY","value":[{"description":{"value":"Month-end close automation workshop","confidence":0.98,"citations":["Month-end close automation workshop"]},"amount":{"value":720.00,"confidence":0.96,"citations":["USD 720.00"]}},{"description":{"value":"Invoice schema rollout and testing","confidence":0.97,"citations":["Invoice schema rollout and testing"]},"amount":{"value":480.00,"confidence":0.95,"citations":["USD 480.00"]}},{"description":{"value":"Vendor onboarding playbook update","confidence":0.95,"citations":["Vendor onboarding playbook update"]},"amount":{"value":190.00,"confidence":0.94,"citations":["USD 190.00"]}}],"confidence":0.97,"citations":[],"source":"accounts-payable-invoice.pdf"},"total_due":{"type":"CURRENCY_AMOUNT","value":1390.00,"confidence":0.97,"citations":["Total Due: USD 1,390.00"],"source":"accounts-payable-invoice.pdf"}}}
Request
import{IterationLayer}from"iterationlayer";constclient=newIterationLayer({apiKey:"YOUR_API_KEY",});constresult=awaitclient.extractDocument({files:[{type:"url",name:"accounts-payable-invoice.pdf",url:"https://iterationlayer.com/code-samples/accounts-payable-invoice.pdf",}],schema:{fields:[{type:"TEXT",name:"invoice_number",description:"The invoice number",},{type:"TEXT",name:"vendor",description:"The vendor legal name",},{type:"DATE",name:"due_date",description:"The invoice due date",},{type:"ARRAY",name:"line_items",description:"Line items",fields:[{type:"TEXT",name:"description",description:"Line item description"},{type:"CURRENCY_AMOUNT",name:"amount",description:"Line item amount"},],},{type:"CURRENCY_AMOUNT",name:"total_due",description:"The final amount due",},],},});
Response
{"success":true,"data":{"invoice_number":{"type":"TEXT","value":"INV-2024-0042","confidence":0.97,"citations":["Invoice #INV-2024-0042"],"source":"accounts-payable-invoice.pdf"},"vendor":{"type":"TEXT","value":"Northwind Accounting Services GmbH","confidence":0.98,"citations":["Northwind Accounting Services GmbH"],"source":"accounts-payable-invoice.pdf"},"due_date":{"type":"DATE","value":"2024-04-14","confidence":0.96,"citations":["Due Date: 2024-04-14"],"source":"accounts-payable-invoice.pdf"},"line_items":{"type":"ARRAY","value":[{"description":{"value":"Month-end close automation workshop","confidence":0.98,"citations":["Month-end close automation workshop"]},"amount":{"value":720.00,"confidence":0.96,"citations":["USD 720.00"]}},{"description":{"value":"Invoice schema rollout and testing","confidence":0.97,"citations":["Invoice schema rollout and testing"]},"amount":{"value":480.00,"confidence":0.95,"citations":["USD 480.00"]}},{"description":{"value":"Vendor onboarding playbook update","confidence":0.95,"citations":["Vendor onboarding playbook update"]},"amount":{"value":190.00,"confidence":0.94,"citations":["USD 190.00"]}}],"confidence":0.97,"citations":[],"source":"accounts-payable-invoice.pdf"},"total_due":{"type":"CURRENCY_AMOUNT","value":1390.00,"confidence":0.97,"citations":["Total Due: USD 1,390.00"],"source":"accounts-payable-invoice.pdf"}}}
Request
fromiterationlayerimportIterationLayerclient=IterationLayer(api_key="YOUR_API_KEY")result=client.extract_document(files=[{"type":"url","name":"accounts-payable-invoice.pdf","url":"https://iterationlayer.com/code-samples/accounts-payable-invoice.pdf",}],schema={"fields":[{"type":"TEXT","name":"invoice_number","description":"The invoice number",},{"type":"TEXT","name":"vendor","description":"The vendor legal name",},{"type":"DATE","name":"due_date","description":"The invoice due date",},{"type":"ARRAY","name":"line_items","description":"Line items","fields":[{"type":"TEXT","name":"description","description":"Line item description"},{"type":"CURRENCY_AMOUNT","name":"amount","description":"Line item amount"},],},{"type":"CURRENCY_AMOUNT","name":"total_due","description":"The final amount due",},],},)
Response
{"success":true,"data":{"invoice_number":{"type":"TEXT","value":"INV-2024-0042","confidence":0.97,"citations":["Invoice #INV-2024-0042"],"source":"accounts-payable-invoice.pdf"},"vendor":{"type":"TEXT","value":"Northwind Accounting Services GmbH","confidence":0.98,"citations":["Northwind Accounting Services GmbH"],"source":"accounts-payable-invoice.pdf"},"due_date":{"type":"DATE","value":"2024-04-14","confidence":0.96,"citations":["Due Date: 2024-04-14"],"source":"accounts-payable-invoice.pdf"},"line_items":{"type":"ARRAY","value":[{"description":{"value":"Month-end close automation workshop","confidence":0.98,"citations":["Month-end close automation workshop"]},"amount":{"value":720.00,"confidence":0.96,"citations":["USD 720.00"]}},{"description":{"value":"Invoice schema rollout and testing","confidence":0.97,"citations":["Invoice schema rollout and testing"]},"amount":{"value":480.00,"confidence":0.95,"citations":["USD 480.00"]}},{"description":{"value":"Vendor onboarding playbook update","confidence":0.95,"citations":["Vendor onboarding playbook update"]},"amount":{"value":190.00,"confidence":0.94,"citations":["USD 190.00"]}}],"confidence":0.97,"citations":[],"source":"accounts-payable-invoice.pdf"},"total_due":{"type":"CURRENCY_AMOUNT","value":1390.00,"confidence":0.97,"citations":["Total Due: USD 1,390.00"],"source":"accounts-payable-invoice.pdf"}}}
Request
importil"github.com/iterationlayer/sdk-go"client:=il.NewClient("YOUR_API_KEY")result,err:=client.ExtractDocument(il.ExtractDocumentRequest{Files:[]il.FileInput{il.NewFileFromURL("accounts-payable-invoice.pdf","https://iterationlayer.com/code-samples/accounts-payable-invoice.pdf",),},Schema:il.ExtractionSchema{"invoice_number":il.NewTextFieldConfig("invoice_number","The invoice number",),"vendor":il.NewTextFieldConfig("vendor","The vendor legal name",),"due_date":il.NewDateFieldConfig("due_date","The invoice due date",),"line_items":il.NewArrayFieldConfig("line_items","Line items",[]il.FieldConfig{il.NewTextFieldConfig("description","Line item description"),il.NewCurrencyAmountFieldConfig("amount","Line item amount"),},),"total_due":il.NewCurrencyAmountFieldConfig("total_due","The final amount due",),},})
Response
{"success":true,"data":{"invoice_number":{"type":"TEXT","value":"INV-2024-0042","confidence":0.97,"citations":["Invoice #INV-2024-0042"],"source":"accounts-payable-invoice.pdf"},"vendor":{"type":"TEXT","value":"Northwind Accounting Services GmbH","confidence":0.98,"citations":["Northwind Accounting Services GmbH"],"source":"accounts-payable-invoice.pdf"},"due_date":{"type":"DATE","value":"2024-04-14","confidence":0.96,"citations":["Due Date: 2024-04-14"],"source":"accounts-payable-invoice.pdf"},"line_items":{"type":"ARRAY","value":[{"description":{"value":"Month-end close automation workshop","confidence":0.98,"citations":["Month-end close automation workshop"]},"amount":{"value":720.00,"confidence":0.96,"citations":["USD 720.00"]}},{"description":{"value":"Invoice schema rollout and testing","confidence":0.97,"citations":["Invoice schema rollout and testing"]},"amount":{"value":480.00,"confidence":0.95,"citations":["USD 480.00"]}},{"description":{"value":"Vendor onboarding playbook update","confidence":0.95,"citations":["Vendor onboarding playbook update"]},"amount":{"value":190.00,"confidence":0.94,"citations":["USD 190.00"]}}],"confidence":0.97,"citations":[],"source":"accounts-payable-invoice.pdf"},"total_due":{"type":"CURRENCY_AMOUNT","value":1390.00,"confidence":0.97,"citations":["Total Due: USD 1,390.00"],"source":"accounts-payable-invoice.pdf"}}}
Use the same workflow from code, agents, or n8n
When an automation moves from prototype to production, you should not have to rebuild it for every environment. Iteration Layer lets scripts, agents, and n8n workflows call the same European AI workflow runtime.
Input
40+ file formats
Extraction
Documents, websites,
and markdown
Generation
Documents, images,
and sheets
Output
Structured format
Input
40+ file formats
Extraction
Document, website and
markdown extraction
Generation
Document, image and
sheet generation
Output
Standardized format
Fits into your existing stack
Native SDKs for TypeScript, Python, and Go. OpenAPI spec for everything else. MCP server for AI agents and Claude Code skills. n8n integration for visual workflows.
EU AI workflow runtime
Run document, image, and file steps through one EU-hosted workflow layer with shared API conventions and billing.
Agent-ready by design
Expose the same document and image actions to MCP tools and Claude Code skills, then reuse the API contract when workflows graduate into scripts or automations.
Verified n8n node
Install the verified Iteration Layer node in n8n, then route documents and generated files through the same provider from visual workflows.
Describe the structured data you want returned using our schema format. Each field has a name, a type, and an optional description to guide extraction.
02
Send your documents
Upload any of 40+ file formats including PDFs, scans, Office files, emails, images, public website URLs, and more. Send up to 20 files per request and combine them into one extraction result.
03
Get structured data
Receive typed JSON with extracted fields, confidence scores, and source citations so you can automate downstream workflows and route uncertain results to review.
Intelligent Extraction
The API chooses the right extraction path for your schema and documents, without making your workflow stitch together OCR, prompting, and post-processing logic.
Define typed fields — dates, IBANs, currencies, addresses, nested arrays — and get structured JSON back. No prompt engineering, no output parsing.
Deep Content Understanding
Scanned pages, images, tables, and handwritten notes become structured candidates your workflow can validate, route, and turn into downstream outputs.
Built-In Trust Scores
Every extracted value includes a confidence score and a verbatim source citation from the document. Use confidence as a routing signal, citations as review context, and approved values before updating another system.
Multi-File Merge
Send up to 20 files per request and get one unified extraction across all of them. Mix formats freely — a PDF invoice, a DOCX contract, a JPEG receipt, and a public website URL in the same call.
40+ File Formats
PDF, DOCX, PPTX, ODT, ODS, XLSX, EPUB, LaTeX, EML, Jupyter notebooks, images, public website URLs, plus text and markup formats like YAML, TOML, RST, and Org — all in the same endpoint.
No Model Training
Your documents are never used to train or improve AI models. This is guaranteed for all plans — not gated behind an enterprise contract.
Real-world pipelines, ready to ship
Each recipe chains multiple APIs into a complete workflow. Pick one, tweak it, and deploy — or use it as a starting point for your own pipeline.
Your data is processed on EU-hosted infrastructure and never stored beyond temporary logs. Zero data retention, GDPR-compliant workflows, and a Data Processing Agreement are available for every customer.
Learn more about our security practices
.
EU-hosted core processing
Application and processing infrastructure runs in Europe, with provider-scope ISO 27001 and BSI C5 evidence documented for procurement reviews.
Zero data retention
Customer files and processing results are not stored after the request. Usage logs are retained for 90 days and automatically deleted.
Clear answers for security teams
Give reviewers the answers they need up front: where files are processed, what is retained, which subprocessors are involved, and how AI inputs, outputs, review gates, and audit records move through each workflow.
Our OCR benchmark shows strong extraction accuracy, reliability, and performance across 41 real workflow files, including forms, invoices, scans, tables, charts, and photos.
What file formats are supported?
The API accepts 40+ file formats including PDF, DOCX, PPTX, ODT, ODS, XLSX, EPUB, CSV, TSV, HTML, LaTeX, EML, Jupyter notebooks, and all common image formats. Scanned documents are processed with built-in OCR.
How does schema-based extraction work?
You define the structured data schema you want returned by describing each field with a name, type, and optional description. The API then extracts those fields from the document and returns them as typed JSON.
You can send up to 20 files per request. All files are combined into a single extraction result — the API pulls fields from across all documents. The total size limit is 200 MB with 50 MB per file.
Does it handle scanned documents?
Yes. The API includes built-in OCR for scanned documents and images. No separate OCR step is needed.
What happens when a field isn't found?
Missing fields return null with a confidence score of 0. You can use confidence thresholds to decide when to flag documents for manual review.
Is Document Extraction GDPR-compliant?
All document extraction runs on EU infrastructure with zero data retention. Your data is processed in memory and discarded after the request. For the architecture details, see GDPR-compliant document processing patterns.
Build your first workflow in minutes
Chain our APIs into a workflow you can test with your own data during the 7-day trial.