Iteration Layer
Menu

Extract Academic Paper Metadata

Extract title, authors, abstract, and citation info from academic papers.

Who this is for

Research teams and academic platforms use this recipe to extract metadata from a paper. Upload a PDF paper and receive structured JSON with title, authors, abstract, publication date, and keywords — ready for indexing, citation analysis, or a literature review tool.

Request
curl -X POST https://api.iterationlayer.com/document-extraction/v1/extract \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "files": [
      {
        "type": "url",
        "name": "paper.pdf",
        "url": "https://example.com/papers/research-paper.pdf"
      }
    ],
    "schema": {
      "fields": [
        {
          "name": "title",
          "type": "TEXT",
          "description": "Title of the academic paper"
        },
        {
          "name": "authors",
          "type": "ARRAY",
          "description": "List of paper authors",
          "fields": [
            {
              "name": "name",
              "type": "TEXT",
              "description": "Full name of the author"
            }
          ]
        },
        {
          "name": "abstract",
          "type": "TEXTAREA",
          "description": "Paper abstract"
        },
        {
          "name": "published_date",
          "type": "DATE",
          "description": "Publication date of the paper"
        },
        {
          "name": "keywords",
          "type": "ARRAY",
          "description": "Subject keywords or tags",
          "fields": [
            {
              "name": "keyword",
              "type": "TEXT",
              "description": "A keyword or topic tag"
            }
          ]
        }
      ]
    }
  }'
Response
{
  "success": true,
  "data": {
    "title": {
      "value": "Attention Is All You Need",
      "confidence": 0.99,
      "citations": ["Attention Is All You Need"]
    },
    "authors": {
      "value": [
        {
          "name": {
            "value": "Ashish Vaswani",
            "confidence": 0.98,
            "citations": ["Ashish Vaswani"]
          }
        },
        {
          "name": {
            "value": "Noam Shazeer",
            "confidence": 0.97,
            "citations": ["Noam Shazeer"]
          }
        }
      ],
      "confidence": 0.97,
      "citations": []
    },
    "abstract": {
      "value": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms.",
      "confidence": 0.95,
      "citations": [
        "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
      ]
    },
    "published_date": {
      "value": "2017-06-12",
      "confidence": 0.94,
      "citations": ["12 Jun 2017"]
    },
    "keywords": {
      "value": [
        {
          "keyword": {
            "value": "transformer",
            "confidence": 0.96,
            "citations": ["Transformer"]
          }
        },
        {
          "keyword": {
            "value": "attention mechanism",
            "confidence": 0.95,
            "citations": ["attention mechanisms"]
          }
        }
      ],
      "confidence": 0.95,
      "citations": []
    }
  }
}
Request
import { IterationLayer } from "iterationlayer";
const client = new IterationLayer({ apiKey: "YOUR_API_KEY" });

const result = await client.extractDocument({
  files: [
    {
      type: "url",
      name: "paper.pdf",
      url: "https://example.com/papers/research-paper.pdf",
    },
  ],
  schema: {
    fields: [
      {
        name: "title",
        type: "TEXT",
        description: "Title of the academic paper",
      },
      {
        name: "authors",
        type: "ARRAY",
        description: "List of paper authors",
        fields: [
          {
            name: "name",
            type: "TEXT",
            description: "Full name of the author",
          },
        ],
      },
      {
        name: "abstract",
        type: "TEXTAREA",
        description: "Paper abstract",
      },
      {
        name: "published_date",
        type: "DATE",
        description: "Publication date of the paper",
      },
      {
        name: "keywords",
        type: "ARRAY",
        description: "Subject keywords or tags",
        fields: [
          {
            name: "keyword",
            type: "TEXT",
            description: "A keyword or topic tag",
          },
        ],
      },
    ],
  },
});
Response
{
  "success": true,
  "data": {
    "title": {
      "value": "Attention Is All You Need",
      "confidence": 0.99,
      "citations": ["Attention Is All You Need"]
    },
    "authors": {
      "value": [
        {
          "name": {
            "value": "Ashish Vaswani",
            "confidence": 0.98,
            "citations": ["Ashish Vaswani"]
          }
        },
        {
          "name": {
            "value": "Noam Shazeer",
            "confidence": 0.97,
            "citations": ["Noam Shazeer"]
          }
        }
      ],
      "confidence": 0.97,
      "citations": []
    },
    "abstract": {
      "value": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms.",
      "confidence": 0.95,
      "citations": [
        "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
      ]
    },
    "published_date": {
      "value": "2017-06-12",
      "confidence": 0.94,
      "citations": ["12 Jun 2017"]
    },
    "keywords": {
      "value": [
        {
          "keyword": {
            "value": "transformer",
            "confidence": 0.96,
            "citations": ["Transformer"]
          }
        },
        {
          "keyword": {
            "value": "attention mechanism",
            "confidence": 0.95,
            "citations": ["attention mechanisms"]
          }
        }
      ],
      "confidence": 0.95,
      "citations": []
    }
  }
}
Request
from iterationlayer import IterationLayer
client = IterationLayer(api_key="YOUR_API_KEY")

result = client.extract_document(
    files=[
        {
            "type": "url",
            "name": "paper.pdf",
            "url": "https://example.com/papers/research-paper.pdf",
        }
    ],
    schema={
        "fields": [
            {
                "name": "title",
                "type": "TEXT",
                "description": "Title of the academic paper",
            },
            {
                "name": "authors",
                "type": "ARRAY",
                "description": "List of paper authors",
                "fields": [
                    {
                        "name": "name",
                        "type": "TEXT",
                        "description": "Full name of the author",
                    },
                ],
            },
            {
                "name": "abstract",
                "type": "TEXTAREA",
                "description": "Paper abstract",
            },
            {
                "name": "published_date",
                "type": "DATE",
                "description": "Publication date of the paper",
            },
            {
                "name": "keywords",
                "type": "ARRAY",
                "description": "Subject keywords or tags",
                "fields": [
                    {
                        "name": "keyword",
                        "type": "TEXT",
                        "description": "A keyword or topic tag",
                    },
                ],
            },
        ]
    },
)
Response
{
  "success": true,
  "data": {
    "title": {
      "value": "Attention Is All You Need",
      "confidence": 0.99,
      "citations": ["Attention Is All You Need"]
    },
    "authors": {
      "value": [
        {
          "name": {
            "value": "Ashish Vaswani",
            "confidence": 0.98,
            "citations": ["Ashish Vaswani"]
          }
        },
        {
          "name": {
            "value": "Noam Shazeer",
            "confidence": 0.97,
            "citations": ["Noam Shazeer"]
          }
        }
      ],
      "confidence": 0.97,
      "citations": []
    },
    "abstract": {
      "value": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms.",
      "confidence": 0.95,
      "citations": [
        "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
      ]
    },
    "published_date": {
      "value": "2017-06-12",
      "confidence": 0.94,
      "citations": ["12 Jun 2017"]
    },
    "keywords": {
      "value": [
        {
          "keyword": {
            "value": "transformer",
            "confidence": 0.96,
            "citations": ["Transformer"]
          }
        },
        {
          "keyword": {
            "value": "attention mechanism",
            "confidence": 0.95,
            "citations": ["attention mechanisms"]
          }
        }
      ],
      "confidence": 0.95,
      "citations": []
    }
  }
}
Request
package main

import il "github.com/iterationlayer/sdk-go"

func main() {
    client := il.NewClient("YOUR_API_KEY")

    result, err := client.ExtractDocument(il.ExtractDocumentRequest{
        Files: []il.FileInput{
            il.FileInput{
                Type: "url",
                Name: "paper.pdf",
                Url: "https://example.com/papers/research-paper.pdf",
            },
        },
        Schema: il.ExtractionSchema{
            Fields: []any{
                il.TextFieldConfig{
                    Name: "title",
                    Type: "TEXT",
                    Description: "Title of the academic paper",
                },
                il.ArrayFieldConfig{
                    Name: "authors",
                    Type: "ARRAY",
                    Description: "List of paper authors",
                    Fields: []any{
                        il.TextFieldConfig{
                            Name: "name",
                            Type: "TEXT",
                            Description: "Full name of the author",
                        },
                    },
                },
                il.TextareaFieldConfig{
                    Name: "abstract",
                    Type: "TEXTAREA",
                    Description: "Paper abstract",
                },
                il.DateFieldConfig{
                    Name: "published_date",
                    Type: "DATE",
                    Description: "Publication date of the paper",
                },
                il.ArrayFieldConfig{
                    Name: "keywords",
                    Type: "ARRAY",
                    Description: "Subject keywords or tags",
                    Fields: []any{
                        il.TextFieldConfig{
                            Name: "keyword",
                            Type: "TEXT",
                            Description: "A keyword or topic tag",
                        },
                    },
                },
            },
        },
    })
    if err != nil {
        panic(err)
    }

    _ = result
}
Response
{
  "success": true,
  "data": {
    "title": {
      "value": "Attention Is All You Need",
      "confidence": 0.99,
      "citations": ["Attention Is All You Need"]
    },
    "authors": {
      "value": [
        {
          "name": {
            "value": "Ashish Vaswani",
            "confidence": 0.98,
            "citations": ["Ashish Vaswani"]
          }
        },
        {
          "name": {
            "value": "Noam Shazeer",
            "confidence": 0.97,
            "citations": ["Noam Shazeer"]
          }
        }
      ],
      "confidence": 0.97,
      "citations": []
    },
    "abstract": {
      "value": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms.",
      "confidence": 0.95,
      "citations": [
        "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
      ]
    },
    "published_date": {
      "value": "2017-06-12",
      "confidence": 0.94,
      "citations": ["12 Jun 2017"]
    },
    "keywords": {
      "value": [
        {
          "keyword": {
            "value": "transformer",
            "confidence": 0.96,
            "citations": ["Transformer"]
          }
        },
        {
          "keyword": {
            "value": "attention mechanism",
            "confidence": 0.95,
            "citations": ["attention mechanisms"]
          }
        }
      ],
      "confidence": 0.95,
      "citations": []
    }
  }
}
Template
{
  "name": "Extract Academic Paper Metadata",
  "nodes": [
    {
      "parameters": {
        "content": "## Extract Academic Paper Metadata\n\nResearch teams and academic platforms use this recipe to extract metadata from a paper. Upload a PDF paper and receive structured JSON with title, authors, abstract, publication date, and keywords \u2014 ready for indexing, citation analysis, or a literature review tool.\n\n**Note:** This workflow uses the Iteration Layer community node (`n8n-nodes-iterationlayer`). Install it via Settings > Community Nodes on self-hosted n8n, or add it directly on n8n Cloud with Verified Community Nodes enabled.",
        "height": 280,
        "width": 500,
        "color": 2
      },
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        200,
        40
      ],
      "id": "2e5849ad-7140-4898-9adc-b818ae2d3de8",
      "name": "Overview"
    },
    {
      "parameters": {
        "content": "### Step 1: Extract Data\nResource: **Document Extraction**\n\nConfigure the Document Extraction parameters below, then connect your credentials.",
        "height": 160,
        "width": 300,
        "color": 6
      },
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        475,
        100
      ],
      "id": "0895d89d-fcc2-409b-a989-35d4344b4b0d",
      "name": "Step 1 Note"
    },
    {
      "parameters": {},
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        250,
        300
      ],
      "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
      "name": "Manual Trigger"
    },
    {
      "parameters": {
        "resource": "documentExtraction",
        "schemaInputMode": "rawJson",
        "schemaJson": "{\"fields\":[{\"name\":\"title\",\"type\":\"TEXT\",\"description\":\"Title of the academic paper\"},{\"name\":\"authors\",\"type\":\"ARRAY\",\"description\":\"List of paper authors\",\"fields\":[{\"name\":\"name\",\"type\":\"TEXT\",\"description\":\"Full name of the author\"}]},{\"name\":\"abstract\",\"type\":\"TEXTAREA\",\"description\":\"Paper abstract\"},{\"name\":\"published_date\",\"type\":\"DATE\",\"description\":\"Publication date of the paper\"},{\"name\":\"keywords\",\"type\":\"ARRAY\",\"description\":\"Subject keywords or tags\",\"fields\":[{\"name\":\"keyword\",\"type\":\"TEXT\",\"description\":\"A keyword or topic tag\"}]}]}",
        "files": {
          "fileValues": [
            {
              "fileInputMode": "url",
              "fileName": "paper.pdf",
              "fileUrl": "https://example.com/papers/research-paper.pdf"
            }
          ]
        }
      },
      "type": "n8n-nodes-iterationlayer.iterationLayer",
      "typeVersion": 1,
      "position": [
        500,
        300
      ],
      "id": "b2c3d4e5-f6a7-8901-bcde-f12345678901",
      "name": "Extract Data",
      "credentials": {
        "iterationLayerApi": {
          "id": "1",
          "name": "Iteration Layer API"
        }
      }
    }
  ],
  "connections": {
    "Manual Trigger": {
      "main": [
        [
          {
            "node": "Extract Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1"
  }
}
Prompt
Extract metadata from the academic paper at [file URL]. Use the extract_document tool with these fields:

- title (TEXT): Title of the academic paper
- authors (ARRAY): Each with name (TEXT)
- abstract (TEXTAREA): Paper abstract
- published_date (DATE): Publication date of the paper
- keywords (ARRAY): Each with keyword (TEXT)

Related Recipes

Try with your own data

Start the 7-day trial and run this recipe in minutes.