How to Extract Academic Paper Metadata with the Iteration Layer Document Extraction API - Recipes

Request

curl -X POST https://api.iterationlayer.com/document-extraction/v1/extract \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "files": [
      {
        "type": "url",
        "name": "paper.pdf",
        "url": "https://example.com/papers/research-paper.pdf"
      }
    ],
    "schema": {
      "fields": [
        {
          "name": "title",
          "type": "TEXT",
          "description": "Title of the academic paper"
        },
        {
          "name": "authors",
          "type": "ARRAY",
          "description": "List of paper authors",
          "fields": [
            {
              "name": "name",
              "type": "TEXT",
              "description": "Full name of the author"
            }
          ]
        },
        {
          "name": "abstract",
          "type": "TEXTAREA",
          "description": "Paper abstract"
        },
        {
          "name": "published_date",
          "type": "DATE",
          "description": "Publication date of the paper"
        },
        {
          "name": "keywords",
          "type": "ARRAY",
          "description": "Subject keywords or tags",
          "fields": [
            {
              "name": "keyword",
              "type": "TEXT",
              "description": "A keyword or topic tag"
            }
          ]
        }
      ]
    }
  }'

Response

{
  "success": true,
  "data": {
    "title": {
      "value": "Attention Is All You Need",
      "confidence": 0.99,
      "citations": ["Attention Is All You Need"]
    },
    "authors": {
      "value": [
        {
          "name": {
            "value": "Ashish Vaswani",
            "confidence": 0.98,
            "citations": ["Ashish Vaswani"]
          }
        },
        {
          "name": {
            "value": "Noam Shazeer",
            "confidence": 0.97,
            "citations": ["Noam Shazeer"]
          }
        }
      ],
      "confidence": 0.97,
      "citations": []
    },
    "abstract": {
      "value": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms.",
      "confidence": 0.95,
      "citations": [
        "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
      ]
    },
    "published_date": {
      "value": "2017-06-12",
      "confidence": 0.94,
      "citations": ["12 Jun 2017"]
    },
    "keywords": {
      "value": [
        {
          "keyword": {
            "value": "transformer",
            "confidence": 0.96,
            "citations": ["Transformer"]
          }
        },
        {
          "keyword": {
            "value": "attention mechanism",
            "confidence": 0.95,
            "citations": ["attention mechanisms"]
          }
        }
      ],
      "confidence": 0.95,
      "citations": []
    }
  }
}

Request

import { IterationLayer } from "iterationlayer";
const client = new IterationLayer({ apiKey: "YOUR_API_KEY" });

const result = await client.extractDocument({
  files: [
    {
      type: "url",
      name: "paper.pdf",
      url: "https://example.com/papers/research-paper.pdf",
    },
  ],
  schema: {
    fields: [
      {
        name: "title",
        type: "TEXT",
        description: "Title of the academic paper",
      },
      {
        name: "authors",
        type: "ARRAY",
        description: "List of paper authors",
        fields: [
          {
            name: "name",
            type: "TEXT",
            description: "Full name of the author",
          },
        ],
      },
      {
        name: "abstract",
        type: "TEXTAREA",
        description: "Paper abstract",
      },
      {
        name: "published_date",
        type: "DATE",
        description: "Publication date of the paper",
      },
      {
        name: "keywords",
        type: "ARRAY",
        description: "Subject keywords or tags",
        fields: [
          {
            name: "keyword",
            type: "TEXT",
            description: "A keyword or topic tag",
          },
        ],
      },
    ],
  },
});

Response

{
  "success": true,
  "data": {
    "title": {
      "value": "Attention Is All You Need",
      "confidence": 0.99,
      "citations": ["Attention Is All You Need"]
    },
    "authors": {
      "value": [
        {
          "name": {
            "value": "Ashish Vaswani",
            "confidence": 0.98,
            "citations": ["Ashish Vaswani"]
          }
        },
        {
          "name": {
            "value": "Noam Shazeer",
            "confidence": 0.97,
            "citations": ["Noam Shazeer"]
          }
        }
      ],
      "confidence": 0.97,
      "citations": []
    },
    "abstract": {
      "value": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms.",
      "confidence": 0.95,
      "citations": [
        "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
      ]
    },
    "published_date": {
      "value": "2017-06-12",
      "confidence": 0.94,
      "citations": ["12 Jun 2017"]
    },
    "keywords": {
      "value": [
        {
          "keyword": {
            "value": "transformer",
            "confidence": 0.96,
            "citations": ["Transformer"]
          }
        },
        {
          "keyword": {
            "value": "attention mechanism",
            "confidence": 0.95,
            "citations": ["attention mechanisms"]
          }
        }
      ],
      "confidence": 0.95,
      "citations": []
    }
  }
}

Request

from iterationlayer import IterationLayer
client = IterationLayer(api_key="YOUR_API_KEY")

result = client.extract_document(
    files=[
        {
            "type": "url",
            "name": "paper.pdf",
            "url": "https://example.com/papers/research-paper.pdf",
        }
    ],
    schema={
        "fields": [
            {
                "name": "title",
                "type": "TEXT",
                "description": "Title of the academic paper",
            },
            {
                "name": "authors",
                "type": "ARRAY",
                "description": "List of paper authors",
                "fields": [
                    {
                        "name": "name",
                        "type": "TEXT",
                        "description": "Full name of the author",
                    },
                ],
            },
            {
                "name": "abstract",
                "type": "TEXTAREA",
                "description": "Paper abstract",
            },
            {
                "name": "published_date",
                "type": "DATE",
                "description": "Publication date of the paper",
            },
            {
                "name": "keywords",
                "type": "ARRAY",
                "description": "Subject keywords or tags",
                "fields": [
                    {
                        "name": "keyword",
                        "type": "TEXT",
                        "description": "A keyword or topic tag",
                    },
                ],
            },
        ]
    },
)

Response

{
  "success": true,
  "data": {
    "title": {
      "value": "Attention Is All You Need",
      "confidence": 0.99,
      "citations": ["Attention Is All You Need"]
    },
    "authors": {
      "value": [
        {
          "name": {
            "value": "Ashish Vaswani",
            "confidence": 0.98,
            "citations": ["Ashish Vaswani"]
          }
        },
        {
          "name": {
            "value": "Noam Shazeer",
            "confidence": 0.97,
            "citations": ["Noam Shazeer"]
          }
        }
      ],
      "confidence": 0.97,
      "citations": []
    },
    "abstract": {
      "value": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms.",
      "confidence": 0.95,
      "citations": [
        "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
      ]
    },
    "published_date": {
      "value": "2017-06-12",
      "confidence": 0.94,
      "citations": ["12 Jun 2017"]
    },
    "keywords": {
      "value": [
        {
          "keyword": {
            "value": "transformer",
            "confidence": 0.96,
            "citations": ["Transformer"]
          }
        },
        {
          "keyword": {
            "value": "attention mechanism",
            "confidence": 0.95,
            "citations": ["attention mechanisms"]
          }
        }
      ],
      "confidence": 0.95,
      "citations": []
    }
  }
}

Request

package main

import il "github.com/iterationlayer/sdk-go"

func main() {
    client := il.NewClient("YOUR_API_KEY")

    result, err := client.ExtractDocument(il.ExtractDocumentRequest{
        Files: []il.FileInput{
            il.FileInput{
                Type: "url",
                Name: "paper.pdf",
                Url: "https://example.com/papers/research-paper.pdf",
            },
        },
        Schema: il.ExtractionSchema{
            Fields: []any{
                il.TextFieldConfig{
                    Name: "title",
                    Type: "TEXT",
                    Description: "Title of the academic paper",
                },
                il.ArrayFieldConfig{
                    Name: "authors",
                    Type: "ARRAY",
                    Description: "List of paper authors",
                    Fields: []any{
                        il.TextFieldConfig{
                            Name: "name",
                            Type: "TEXT",
                            Description: "Full name of the author",
                        },
                    },
                },
                il.TextareaFieldConfig{
                    Name: "abstract",
                    Type: "TEXTAREA",
                    Description: "Paper abstract",
                },
                il.DateFieldConfig{
                    Name: "published_date",
                    Type: "DATE",
                    Description: "Publication date of the paper",
                },
                il.ArrayFieldConfig{
                    Name: "keywords",
                    Type: "ARRAY",
                    Description: "Subject keywords or tags",
                    Fields: []any{
                        il.TextFieldConfig{
                            Name: "keyword",
                            Type: "TEXT",
                            Description: "A keyword or topic tag",
                        },
                    },
                },
            },
        },
    })
    if err != nil {
        panic(err)
    }

    _ = result
}

Response

{
  "success": true,
  "data": {
    "title": {
      "value": "Attention Is All You Need",
      "confidence": 0.99,
      "citations": ["Attention Is All You Need"]
    },
    "authors": {
      "value": [
        {
          "name": {
            "value": "Ashish Vaswani",
            "confidence": 0.98,
            "citations": ["Ashish Vaswani"]
          }
        },
        {
          "name": {
            "value": "Noam Shazeer",
            "confidence": 0.97,
            "citations": ["Noam Shazeer"]
          }
        }
      ],
      "confidence": 0.97,
      "citations": []
    },
    "abstract": {
      "value": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms.",
      "confidence": 0.95,
      "citations": [
        "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
      ]
    },
    "published_date": {
      "value": "2017-06-12",
      "confidence": 0.94,
      "citations": ["12 Jun 2017"]
    },
    "keywords": {
      "value": [
        {
          "keyword": {
            "value": "transformer",
            "confidence": 0.96,
            "citations": ["Transformer"]
          }
        },
        {
          "keyword": {
            "value": "attention mechanism",
            "confidence": 0.95,
            "citations": ["attention mechanisms"]
          }
        }
      ],
      "confidence": 0.95,
      "citations": []
    }
  }
}

Template

{
  "name": "Extract Academic Paper Metadata",
  "nodes": [
    {
      "parameters": {
        "content": "## Extract Academic Paper Metadata\n\nResearch teams and academic platforms use this recipe to extract metadata from a paper. Upload a PDF paper and receive structured JSON with title, authors, abstract, publication date, and keywords \u2014 ready for indexing, citation analysis, or a literature review tool.\n\n**Note:** This workflow uses the Iteration Layer community node (`n8n-nodes-iterationlayer`). Install it via Settings > Community Nodes on self-hosted n8n, or add it directly on n8n Cloud with Verified Community Nodes enabled.",
        "height": 280,
        "width": 500,
        "color": 2
      },
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        200,
        40
      ],
      "id": "2e5849ad-7140-4898-9adc-b818ae2d3de8",
      "name": "Overview"
    },
    {
      "parameters": {
        "content": "### Step 1: Extract Data\nResource: **Document Extraction**\n\nConfigure the Document Extraction parameters below, then connect your credentials.",
        "height": 160,
        "width": 300,
        "color": 6
      },
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        475,
        100
      ],
      "id": "0895d89d-fcc2-409b-a989-35d4344b4b0d",
      "name": "Step 1 Note"
    },
    {
      "parameters": {},
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        250,
        300
      ],
      "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
      "name": "Manual Trigger"
    },
    {
      "parameters": {
        "resource": "documentExtraction",
        "schemaInputMode": "rawJson",
        "schemaJson": "{\"fields\":[{\"name\":\"title\",\"type\":\"TEXT\",\"description\":\"Title of the academic paper\"},{\"name\":\"authors\",\"type\":\"ARRAY\",\"description\":\"List of paper authors\",\"fields\":[{\"name\":\"name\",\"type\":\"TEXT\",\"description\":\"Full name of the author\"}]},{\"name\":\"abstract\",\"type\":\"TEXTAREA\",\"description\":\"Paper abstract\"},{\"name\":\"published_date\",\"type\":\"DATE\",\"description\":\"Publication date of the paper\"},{\"name\":\"keywords\",\"type\":\"ARRAY\",\"description\":\"Subject keywords or tags\",\"fields\":[{\"name\":\"keyword\",\"type\":\"TEXT\",\"description\":\"A keyword or topic tag\"}]}]}",
        "files": {
          "fileValues": [
            {
              "fileInputMode": "url",
              "fileName": "paper.pdf",
              "fileUrl": "https://example.com/papers/research-paper.pdf"
            }
          ]
        }
      },
      "type": "n8n-nodes-iterationlayer.iterationLayer",
      "typeVersion": 1,
      "position": [
        500,
        300
      ],
      "id": "b2c3d4e5-f6a7-8901-bcde-f12345678901",
      "name": "Extract Data",
      "credentials": {
        "iterationLayerApi": {
          "id": "1",
          "name": "Iteration Layer API"
        }
      }
    }
  ],
  "connections": {
    "Manual Trigger": {
      "main": [
        [
          {
            "node": "Extract Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1"
  }
}

Prompt

Extract metadata from the academic paper at [file URL]. Use the extract_document tool with these fields:

- title (TEXT): Title of the academic paper
- authors (ARRAY): Each with name (TEXT)
- abstract (TEXTAREA): Paper abstract
- published_date (DATE): Publication date of the paper
- keywords (ARRAY): Each with keyword (TEXT)

Ingest

Generate

Integrations

Built for

By product

By industry

Overview

APIs

Integrations

Billing

Benchmarks

Blog

More

Extract Academic Paper Metadata

Who this is for

Related Recipes

Extract 1098 Data

Extract 1099-MISC Data

Extract 1099-NEC Data

Try with your own data

Document Extraction