From 5d9a7e7f79bfa12ac2ef3de99271f528cd22c0a1 Mon Sep 17 00:00:00 2001 From: Google Team Member Date: Fri, 24 Oct 2025 19:14:29 -0700 Subject: [PATCH] feat: enable persistent browser sessions in the computer use sample The computer_use sample now supports launching with a `user_data_dir` to maintain browser state across runs. The sample agent is updated to use a shared temporary directory for the browser profile, preserving login sessions and other data. PiperOrigin-RevId: 823749082 --- contributing/samples/computer_use/agent.py | 24 +++++--- .../samples/computer_use/playwright.py | 57 +++++++++++++++---- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/contributing/samples/computer_use/agent.py b/contributing/samples/computer_use/agent.py index 5fed9aa2..00199501 100755 --- a/contributing/samples/computer_use/agent.py +++ b/contributing/samples/computer_use/agent.py @@ -12,13 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import tempfile + from google.adk import Agent -from google.adk.models.google_llm import Gemini from google.adk.tools.computer_use.computer_use_toolset import ComputerUseToolset -from typing_extensions import override from .playwright import PlaywrightComputer +# Define user_data_dir path +profile_name = 'browser_profile_for_adk' +profile_path = os.path.join(tempfile.gettempdir(), profile_name) +os.makedirs(profile_path, exist_ok=True) + +computer_with_profile = PlaywrightComputer( + screen_size=(1280, 936), + user_data_dir=profile_path, +) + +# Create agent with the toolset using the new computer instance root_agent = Agent( model='gemini-2.5-computer-use-preview-10-2025', name='hello_world_agent', @@ -26,10 +38,6 @@ root_agent = Agent( 'computer use agent that can operate a browser on a computer to finish' ' user tasks' ), - instruction=""" - you are a computer use agent - """, - tools=[ - ComputerUseToolset(computer=PlaywrightComputer(screen_size=(1280, 936))) - ], + instruction=""" you are a computer use agent """, + tools=[ComputerUseToolset(computer=computer_with_profile)], ) diff --git a/contributing/samples/computer_use/playwright.py b/contributing/samples/computer_use/playwright.py index 64ad54fd..1df542cc 100644 --- a/contributing/samples/computer_use/playwright.py +++ b/contributing/samples/computer_use/playwright.py @@ -14,6 +14,7 @@ import asyncio import time from typing import Literal +from typing import Optional from google.adk.tools.computer_use.base_computer import BaseComputer from google.adk.tools.computer_use.base_computer import ComputerEnvironment @@ -79,29 +80,61 @@ class PlaywrightComputer(BaseComputer): initial_url: str = "https://www.google.com", search_engine_url: str = "https://www.google.com", highlight_mouse: bool = False, + user_data_dir: Optional[str] = None, ): self._initial_url = initial_url self._screen_size = screen_size self._search_engine_url = search_engine_url self._highlight_mouse = highlight_mouse + self._user_data_dir = user_data_dir @override async def initialize(self): print("Creating session...") self._playwright = await async_playwright().start() - self._browser = await self._playwright.chromium.launch( - args=["--disable-blink-features=AutomationControlled"], - headless=False, - ) - self._context = await self._browser.new_context( - viewport={ - "width": self._screen_size[0], - "height": self._screen_size[1], - } - ) - self._page = await self._context.new_page() - await self._page.goto(self._initial_url) + # Define common arguments for both launch types + browser_args = [ + "--disable-blink-features=AutomationControlled", + "--disable-gpu", + ] + + if self._user_data_dir: + termcolor.cprint( + f"Starting playwright with persistent profile: {self._user_data_dir}", + color="yellow", + attrs=["bold"], + ) + # Use a persistent context if user_data_dir is provided + self._context = await self._playwright.chromium.launch_persistent_context( + self._user_data_dir, + headless=False, + args=browser_args, + ) + self._browser = self._context.browser + else: + termcolor.cprint( + "Starting playwright with a temporary profile.", + color="yellow", + attrs=["bold"], + ) + # Launch a temporary browser instance if user_data_dir is not provided + self._browser = await self._playwright.chromium.launch( + args=browser_args, + headless=False, + ) + self._context = await self._browser.new_context() + + if not self._context.pages: + self._page = await self._context.new_page() + await self._page.goto(self._initial_url) + else: + self._page = self._context.pages[0] # Use existing page if any + + await self._page.set_viewport_size({ + "width": self._screen_size[0], + "height": self._screen_size[1], + }) termcolor.cprint( f"Started local playwright.", color="green",