from playwright.async_api import Page
from typing import TypedDict
from crawl4ai import (
AsyncWebCrawler,
CrawlerRunConfig,
DefaultMarkdownGenerator,
PruningContentFilter,
CrawlResult,
)
from utils.config import get_browser_config
class Params(TypedDict):
pass
async def automation(page: Page, params: Params | None = None, **_kwargs):
browser_config = get_browser_config()
async with AsyncWebCrawler(config=browser_config) as crawler:
crawler_config = CrawlerRunConfig(
markdown_generator=DefaultMarkdownGenerator(
content_filter=PruningContentFilter(),
),
)
result: CrawlResult = await crawler.arun(
url="https://www.helloworld.org", config=crawler_config
)
return result.markdown.raw_markdown