Digitize Academic Papers

Extract titles, authors, abstracts, and citations from academic papers into structured JSON for research workflows.

Who this is for

Research teams and academic platforms use this recipe to digitize paper metadata at scale. Upload a PDF paper and receive structured JSON with title, authors, abstract, publication date, and keywords — ready for indexing, citation analysis, or literature review tools.

curl -X POST https://api.iterationlayer.com/document-extraction/v1/extract \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "files": [
      {
        "type": "url",
        "name": "paper.pdf",
        "url": "https://example.com/papers/research-paper.pdf"
      }
    ],
    "schema": {
      "fields": [
        { "name": "title", "type": "TEXT", "description": "Title of the academic paper" },
        { "name": "authors", "type": "ARRAY", "description": "List of paper authors", "fields": [
          { "name": "name", "type": "TEXT", "description": "Full name of the author" }
        ]},
        { "name": "abstract", "type": "TEXTAREA", "description": "Paper abstract" },
        { "name": "published_date", "type": "DATE", "description": "Publication date of the paper" },
        { "name": "keywords", "type": "ARRAY", "description": "Subject keywords or tags", "fields": [
          { "name": "keyword", "type": "TEXT", "description": "A keyword or topic tag" }
        ]}
      ]
    }
  }'
import { IterationLayer } from "iterationlayer";
const client = new IterationLayer({ apiKey: "YOUR_API_KEY" });

const result = await client.extract({
  files: [
    {
      type: "url",
      name: "paper.pdf",
      url: "https://example.com/papers/research-paper.pdf",
    },
  ],
  schema: {
    fields: [
      { name: "title", type: "TEXT", description: "Title of the academic paper" },
      { name: "authors", type: "ARRAY", description: "List of paper authors", fields: [
        { name: "name", type: "TEXT", description: "Full name of the author" },
      ]},
      { name: "abstract", type: "TEXTAREA", description: "Paper abstract" },
      { name: "published_date", type: "DATE", description: "Publication date of the paper" },
      { name: "keywords", type: "ARRAY", description: "Subject keywords or tags", fields: [
        { name: "keyword", type: "TEXT", description: "A keyword or topic tag" },
      ]},
    ],
  },
});

console.log(result);
from iterationlayer import IterationLayer
client = IterationLayer(api_key="YOUR_API_KEY")

result = client.extract(
    files=[
        {
            "type": "url",
            "name": "paper.pdf",
            "url": "https://example.com/papers/research-paper.pdf",
        }
    ],
    schema={
        "fields": [
            {"name": "title", "type": "TEXT", "description": "Title of the academic paper"},
            {"name": "authors", "type": "ARRAY", "description": "List of paper authors", "fields": [
                {"name": "name", "type": "TEXT", "description": "Full name of the author"},
            ]},
            {"name": "abstract", "type": "TEXTAREA", "description": "Paper abstract"},
            {"name": "published_date", "type": "DATE", "description": "Publication date of the paper"},
            {"name": "keywords", "type": "ARRAY", "description": "Subject keywords or tags", "fields": [
                {"name": "keyword", "type": "TEXT", "description": "A keyword or topic tag"},
            ]},
        ]
    },
)

print(result)
package main

import (
    "fmt"
    il "github.com/iterationlayer/sdk-go"
)

func main() {
    client := il.NewClient("YOUR_API_KEY")

    result, err := client.Extract(il.ExtractRequest{
        Files: []il.FileInput{
            il.NewFileFromURL("paper.pdf", "https://example.com/papers/research-paper.pdf"),
        },
        Schema: il.ExtractionSchema{
            "title":          il.NewTextFieldConfig("title", "Title of the academic paper"),
            "authors":        il.NewArrayFieldConfig("authors", "List of paper authors", il.ExtractionSchema{
                "name": il.NewTextFieldConfig("name", "Full name of the author"),
            }),
            "abstract":       il.NewTextFieldConfig("abstract", "Paper abstract"),
            "published_date": il.NewDateFieldConfig("published_date", "Publication date of the paper"),
            "keywords":       il.NewArrayFieldConfig("keywords", "Subject keywords or tags", il.ExtractionSchema{
                "keyword": il.NewTextFieldConfig("keyword", "A keyword or topic tag"),
            }),
        },
    })
    if err != nil {
        panic(err)
    }

    fmt.Println(result)
}

Related Recipes

Start building in minutes

Free trial credits included. No credit card required.