Skip to main content
Deprecated: This function is deprecated and will be removed in the future.
Extracts a structured object from a web page.
export declare function extractObjectFromPage(
  page: Page,
  options: {
    label: string;
    entityName: string;
    entitySchema: SimpleObjectSchema;
    strategy?: ImageStrategy | HtmlStrategy;
    prompt?: string;
    optionalPropertiesInvalidator?: (
      result: Record<string, string | null> | null
    ) => string[];
    variantKey?: string;
    apiKey?: string;
  }
): Promise<Record<string, string | null> | null>;

Examples

import { extractObjectFromPage } from "@intuned/browser/optimized-extractors";

await page.goto(
  "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html"
);
const book = await extractObjectFromPage(page, {
  entityName: "book",
  label: "book-extraction",
  entitySchema: {
    type: "object",
    required: ["name", "price", "reviews"],
    properties: {
      name: {
        type: "string",
        description: "book name",
      },
      price: {
        type: "string",
        description: "book price",
      },
      reviews: {
        type: "string",
        description: "Number of reviews",
      },
    },
  },
});

console.log(book);

// output:
// { name: 'A Light in the Attic', price: '£51.77', reviews: '0' }

Arguments

page
any
required
The Playwright Page object from which to extract the data.
options
object
required

Returns: any

A promise that resolves to the extracted object.