feat: enable persistent browser sessions in the computer use sample

The computer_use sample now supports launching with a `user_data_dir` to maintain browser state across runs. The sample agent is updated to use a shared temporary directory for the browser profile, preserving login sessions and other data.

PiperOrigin-RevId: 823749082
This commit is contained in:
Google Team Member
2025-10-24 19:14:29 -07:00
committed by Copybara-Service
parent f8a9672b38
commit 5d9a7e7f79
2 changed files with 61 additions and 20 deletions
+16 -8
View File
@@ -12,13 +12,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
from google.adk import Agent
from google.adk.models.google_llm import Gemini
from google.adk.tools.computer_use.computer_use_toolset import ComputerUseToolset
from typing_extensions import override
from .playwright import PlaywrightComputer
# Define user_data_dir path
profile_name = 'browser_profile_for_adk'
profile_path = os.path.join(tempfile.gettempdir(), profile_name)
os.makedirs(profile_path, exist_ok=True)
computer_with_profile = PlaywrightComputer(
screen_size=(1280, 936),
user_data_dir=profile_path,
)
# Create agent with the toolset using the new computer instance
root_agent = Agent(
model='gemini-2.5-computer-use-preview-10-2025',
name='hello_world_agent',
@@ -26,10 +38,6 @@ root_agent = Agent(
'computer use agent that can operate a browser on a computer to finish'
' user tasks'
),
instruction="""
you are a computer use agent
""",
tools=[
ComputerUseToolset(computer=PlaywrightComputer(screen_size=(1280, 936)))
],
instruction=""" you are a computer use agent """,
tools=[ComputerUseToolset(computer=computer_with_profile)],
)
+45 -12
View File
@@ -14,6 +14,7 @@
import asyncio
import time
from typing import Literal
from typing import Optional
from google.adk.tools.computer_use.base_computer import BaseComputer
from google.adk.tools.computer_use.base_computer import ComputerEnvironment
@@ -79,29 +80,61 @@ class PlaywrightComputer(BaseComputer):
initial_url: str = "https://www.google.com",
search_engine_url: str = "https://www.google.com",
highlight_mouse: bool = False,
user_data_dir: Optional[str] = None,
):
self._initial_url = initial_url
self._screen_size = screen_size
self._search_engine_url = search_engine_url
self._highlight_mouse = highlight_mouse
self._user_data_dir = user_data_dir
@override
async def initialize(self):
print("Creating session...")
self._playwright = await async_playwright().start()
self._browser = await self._playwright.chromium.launch(
args=["--disable-blink-features=AutomationControlled"],
headless=False,
)
self._context = await self._browser.new_context(
viewport={
"width": self._screen_size[0],
"height": self._screen_size[1],
}
)
self._page = await self._context.new_page()
await self._page.goto(self._initial_url)
# Define common arguments for both launch types
browser_args = [
"--disable-blink-features=AutomationControlled",
"--disable-gpu",
]
if self._user_data_dir:
termcolor.cprint(
f"Starting playwright with persistent profile: {self._user_data_dir}",
color="yellow",
attrs=["bold"],
)
# Use a persistent context if user_data_dir is provided
self._context = await self._playwright.chromium.launch_persistent_context(
self._user_data_dir,
headless=False,
args=browser_args,
)
self._browser = self._context.browser
else:
termcolor.cprint(
"Starting playwright with a temporary profile.",
color="yellow",
attrs=["bold"],
)
# Launch a temporary browser instance if user_data_dir is not provided
self._browser = await self._playwright.chromium.launch(
args=browser_args,
headless=False,
)
self._context = await self._browser.new_context()
if not self._context.pages:
self._page = await self._context.new_page()
await self._page.goto(self._initial_url)
else:
self._page = self._context.pages[0] # Use existing page if any
await self._page.set_viewport_size({
"width": self._screen_size[0],
"height": self._screen_size[1],
})
termcolor.cprint(
f"Started local playwright.",
color="green",