Skip to content

Handle downloads in playwright browser, solved the issue #127 #189

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import logging
from typing import Any, Dict

from autogen_core import Component
import docker
Expand Down Expand Up @@ -87,21 +88,21 @@ async def create_container(self) -> Container:
)

client = docker.from_env()
return await asyncio.to_thread(
client.containers.create,
name=f"magentic-ui-headless-browser_{self._playwright_port}",
image="mcr.microsoft.com/playwright:v1.51.1-noble",
detach=True,
auto_remove=True,
ports={
container_config: Dict[str, Any] = {
"name": f"magentic-ui-headless-browser_{self._playwright_port}",
"image": "mcr.microsoft.com/playwright:v1.51.1-noble",
"detach": True,
"auto_remove": True,
"ports": {
f"{self._playwright_port}/tcp": self._playwright_port,
},
command=[
"command": [
"/bin/sh",
"-c",
f"npx -y [email protected] run-server --port {self._playwright_port} --host 0.0.0.0",
],
)
}
return await asyncio.to_thread(client.containers.create, **container_config)

def _to_config(self) -> HeadlessBrowserConfig:
return HeadlessBrowserConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

from typing import Optional, Any, Dict
from pathlib import Path
import os

from autogen_core import Component
from playwright.async_api import BrowserContext, Browser
from playwright.async_api import BrowserContext, Browser, Route
from pydantic import BaseModel

from playwright.async_api import async_playwright, Playwright
Expand Down Expand Up @@ -99,15 +100,19 @@ async def _start(self) -> None:
# Ensure the browser data directory exists
Path(self._browser_data_dir).mkdir(parents=True, exist_ok=True)

# Launch persistent context
# Launch persistent context with automatic downloads
self._context = await self._playwright.chromium.launch_persistent_context(
self._browser_data_dir,
accept_downloads=self._enable_downloads,
accept_downloads=True, # Always accept downloads
**launch_options,
args=["--disable-extensions", "--disable-file-system"],
env={},
chromium_sandbox=True,
)

# Set up download behavior for persistent context
if self._enable_downloads:
await self._context.route("**/*", self._handle_download)
else:
# Launch regular browser and create new context
self._browser = await self._playwright.chromium.launch(
Expand All @@ -117,11 +122,47 @@ async def _start(self) -> None:
env={} if self._headless else {"DISPLAY": ":0"},
)

# Create context with automatic downloads
self._context = await self._browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0",
accept_downloads=self._enable_downloads,
accept_downloads=True, # Always accept downloads
)

# Set up download behavior for regular context
if self._enable_downloads:
await self._context.route("**/*", self._handle_download)

async def _handle_download(self, route: Route) -> None:
"""
Handle download requests by intercepting them and saving to the .webby directory.
"""
response = await route.fetch()
headers = response.headers

# Check if this is a download (Content-Disposition header)
if "content-disposition" in headers:
# Extract filename from Content-Disposition header
content_disposition = headers["content-disposition"]
filename = None
if "filename=" in content_disposition:
filename = content_disposition.split("filename=")[1].strip('"')

if filename and self._browser_data_dir:
# Save to .webby directory
webby_dir = os.path.join(self._browser_data_dir, ".webby")
os.makedirs(webby_dir, exist_ok=True)

filepath = os.path.join(webby_dir, filename)
with open(filepath, "wb") as f:
f.write(await response.body())

# Continue with the download in browser
await route.continue_()
else:
await route.continue_()
else:
await route.continue_()

async def _close(self) -> None:
"""
Close the browser resource.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import logging
from typing import Any, Dict, Tuple

from pathlib import Path
import secrets
Expand Down Expand Up @@ -101,7 +102,7 @@ def __init__(
)
self._docker_name = f"magentic-ui-vnc-browser_{self._playwright_websocket_path}_{self._novnc_port}"

def _get_available_port(self) -> tuple[int, socket.socket]:
def _get_available_port(self) -> Tuple[int, socket.socket]:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(("127.0.0.1", 0))
port = s.getsockname()[1]
Expand Down Expand Up @@ -160,27 +161,26 @@ async def create_container(self) -> Container:
)

client = docker.from_env()

return await asyncio.to_thread(
client.containers.create,
name=self._docker_name,
image=self._image,
detach=True,
auto_remove=True,
network=self._network_name if self._inside_docker else None,
ports={
container_config: Dict[str, Any] = {
"name": self._docker_name,
"image": self._image,
"detach": True,
"auto_remove": True,
"network": self._network_name if self._inside_docker else None,
"ports": {
f"{self._playwright_port}/tcp": self._playwright_port,
f"{self._novnc_port}/tcp": self._novnc_port,
},
volumes={
"volumes": {
str(self._bind_dir.resolve()): {"bind": "/workspace", "mode": "rw"}
},
environment={
"environment": {
"PLAYWRIGHT_WS_PATH": self._playwright_websocket_path,
"PLAYWRIGHT_PORT": str(self._playwright_port),
"NO_VNC_PORT": str(self._novnc_port),
},
)
}
return await asyncio.to_thread(client.containers.create, **container_config)

def _to_config(self) -> VncDockerPlaywrightBrowserConfig:
return VncDockerPlaywrightBrowserConfig(
Expand Down
Loading