Content Aggregation Pipeline

Parse documents from multiple sources and transform embedded images for a unified content feed.

Who this is for

Content platforms and news aggregators use this pipeline to ingest articles from various sources. Extract the article title, body, and image URL from a document, then optimize the image for web display — ready for your content feed.

# Step 1: Extract article content from document
EXTRACTION=$(curl -s -X POST https://api.iterationlayer.com/document-extraction/v1/extract \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "files": [
      {
        "type": "url",
        "name": "article.pdf",
        "url": "https://example.com/articles/feature-story.pdf"
      }
    ],
    "schema": {
      "fields": [
        { "name": "title", "type": "TEXT", "description": "Article headline" },
        { "name": "body", "type": "TEXTAREA", "description": "Full article body text" },
        { "name": "image_url", "type": "TEXT", "description": "URL of the embedded article image" }
      ]
    }
  }')

IMAGE_URL=$(echo "$EXTRACTION" | jq -r '.results[0].fields.image_url')

# Step 2: Optimize the extracted image for web display
curl -X POST https://api.iterationlayer.com/image-transformation/v1/transform \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d "{
    \"file\": {
      \"type\": \"url\",
      \"name\": \"article-image.jpg\",
      \"url\": \"$IMAGE_URL\"
    },
    \"operations\": [
      { \"type\": \"resize\", \"width_in_px\": 800, \"height_in_px\": 600, \"fit\": \"cover\" },
      { \"type\": \"auto_contrast\" },
      { \"type\": \"convert\", \"format\": \"webp\", \"quality\": 80 }
    ]
  }"
import { IterationLayer } from "iterationlayer";
const client = new IterationLayer({ apiKey: "YOUR_API_KEY" });

// Step 1: Extract article content from document
const extraction = await client.extract({
  files: [
    {
      type: "url",
      name: "article.pdf",
      url: "https://example.com/articles/feature-story.pdf",
    },
  ],
  schema: {
    fields: [
      { name: "title", type: "TEXT", description: "Article headline" },
      { name: "body", type: "TEXTAREA", description: "Full article body text" },
      { name: "image_url", type: "TEXT", description: "URL of the embedded article image" },
    ],
  },
});

const { image_url } = extraction.results[0].fields;

// Step 2: Optimize the extracted image for web display
const imageResult = await client.transform({
  file: {
    type: "url",
    name: "article-image.jpg",
    url: image_url,
  },
  operations: [
    { type: "resize", width_in_px: 800, height_in_px: 600, fit: "cover" },
    { type: "auto_contrast" },
    { type: "convert", format: "webp", quality: 80 },
  ],
});

const imageBuffer = Buffer.from(imageResult.data.buffer, "base64");
import base64

from iterationlayer import IterationLayer
client = IterationLayer(api_key="YOUR_API_KEY")

# Step 1: Extract article content from document
extraction = client.extract(
    files=[
        {
            "type": "url",
            "name": "article.pdf",
            "url": "https://example.com/articles/feature-story.pdf",
        }
    ],
    schema={
        "fields": [
            {"name": "title", "type": "TEXT", "description": "Article headline"},
            {"name": "body", "type": "TEXTAREA", "description": "Full article body text"},
            {"name": "image_url", "type": "TEXT", "description": "URL of the embedded article image"},
        ]
    },
)

image_url = extraction["results"][0]["fields"]["image_url"]

# Step 2: Optimize the extracted image for web display
image_result = client.transform(
    file={
        "type": "url",
        "name": "article-image.jpg",
        "url": image_url,
    },
    operations=[
        {"type": "resize", "width_in_px": 800, "height_in_px": 600, "fit": "cover"},
        {"type": "auto_contrast"},
        {"type": "convert", "format": "webp", "quality": 80},
    ],
)

with open("optimized.webp", "wb") as f:
    f.write(base64.b64decode(image_result["data"]["buffer"]))
package main

import il "github.com/iterationlayer/sdk-go"

client := il.NewClient("YOUR_API_KEY")

// Step 1: Extract article content from document
extraction, err := client.Extract(il.ExtractRequest{
    Files: []il.FileInput{
        il.NewFileFromURL("article.pdf", "https://example.com/articles/feature-story.pdf"),
    },
    Schema: il.ExtractionSchema{
        "title":     il.NewTextFieldConfig("title", "Article headline"),
        "body":      il.NewTextFieldConfig("body", "Full article body text"),
        "image_url": il.NewTextFieldConfig("image_url", "URL of the embedded article image"),
    },
})

imageURL := extraction.Results[0].Fields["image_url"]

// Step 2: Optimize the extracted image for web display
imageResult, err := client.Transform(il.TransformRequest{
    File: il.NewFileFromURL("article-image.jpg", imageURL),
    Operations: []il.TransformOperation{
        il.NewResizeOperation(800, 600, "cover"),
        il.NewConvertOperation("webp"),
    },
})

Related Recipes

Start building in minutes

Free trial credits included. No credit card required.