From cc24d616f80c0eba2b09239b621cf3d176f144ea Mon Sep 17 00:00:00 2001 From: "Xiang (Sean) Zhou" Date: Thu, 9 Oct 2025 11:51:48 -0700 Subject: [PATCH] feat: Support ContentUnion as static instruction PiperOrigin-RevId: 817278990 --- src/google/adk/agents/llm_agent.py | 17 +- src/google/adk/agents/llm_agent_config.py | 11 +- .../adk/flows/llm_flows/instructions.py | 5 +- .../flows/llm_flows/test_instructions.py | 165 ++++++++++++++++++ 4 files changed, 190 insertions(+), 8 deletions(-) diff --git a/src/google/adk/agents/llm_agent.py b/src/google/adk/agents/llm_agent.py index 3bfbfea0..75921735 100644 --- a/src/google/adk/agents/llm_agent.py +++ b/src/google/adk/agents/llm_agent.py @@ -196,7 +196,7 @@ class LlmAgent(BaseAgent): or personality. """ - static_instruction: Optional[types.Content] = None + static_instruction: Optional[types.ContentUnion] = None """Static instruction content sent literally as system instruction at the beginning. This field is for content that never changes and doesn't contain placeholders. @@ -223,11 +223,20 @@ class LlmAgent(BaseAgent): For explicit caching control, configure context_cache_config at App level. **Content Support:** - Can contain text, files, binaries, or any combination as types.Content - supports multiple part types (text, inline_data, file_data, etc.). + Accepts types.ContentUnion which includes: + - str: Simple text instruction + - types.Content: Rich content object + - types.Part: Single part (text, inline_data, file_data, etc.) + - PIL.Image.Image: Image object + - types.File: File reference + - list[PartUnion]: List of parts - **Example:** + **Examples:** ```python + # Simple string instruction + static_instruction = "You are a helpful assistant." + + # Rich content with files static_instruction = types.Content( role='user', parts=[ diff --git a/src/google/adk/agents/llm_agent_config.py b/src/google/adk/agents/llm_agent_config.py index 4cb5dc2c..4203a592 100644 --- a/src/google/adk/agents/llm_agent_config.py +++ b/src/google/adk/agents/llm_agent_config.py @@ -35,6 +35,10 @@ class LlmAgentConfig(BaseAgentConfig): model_config = ConfigDict( extra='forbid', + # Allow arbitrary types to support types.ContentUnion for static_instruction. + # ContentUnion includes PIL.Image.Image which doesn't have Pydantic schema + # support, but we validate it at runtime using google.genai._transformers.t_content() + arbitrary_types_allowed=True, ) agent_class: str = Field( @@ -62,14 +66,15 @@ class LlmAgentConfig(BaseAgentConfig): ) ) - static_instruction: Optional[types.Content] = Field( + static_instruction: Optional[types.ContentUnion] = Field( default=None, description=( 'Optional. LlmAgent.static_instruction. Static content sent literally' ' at position 0 without placeholder processing. When set, changes' ' instruction behavior to go to user content instead of' - ' system_instruction. Supports context caching and rich content' - ' (text, files, binaries).' + ' system_instruction. Supports context caching. Accepts' + ' types.ContentUnion (str, types.Content, types.Part,' + ' PIL.Image.Image, types.File, or list[PartUnion]).' ), ) diff --git a/src/google/adk/flows/llm_flows/instructions.py b/src/google/adk/flows/llm_flows/instructions.py index 8c749962..587c1646 100644 --- a/src/google/adk/flows/llm_flows/instructions.py +++ b/src/google/adk/flows/llm_flows/instructions.py @@ -19,6 +19,7 @@ from __future__ import annotations from typing import AsyncGenerator from typing import TYPE_CHECKING +from google.genai import _transformers from typing_extensions import override from ...agents.readonly_context import ReadonlyContext @@ -84,7 +85,9 @@ class _InstructionsLlmRequestProcessor(BaseLlmRequestProcessor): # Handle static_instruction - add via append_instructions if agent.static_instruction: - llm_request.append_instructions(agent.static_instruction) + # Convert ContentUnion to Content using genai transformer + static_content = _transformers.t_content(agent.static_instruction) + llm_request.append_instructions(static_content) # Handle instruction based on whether static_instruction exists if agent.instruction and not agent.static_instruction: diff --git a/tests/unittests/flows/llm_flows/test_instructions.py b/tests/unittests/flows/llm_flows/test_instructions.py index 88bc13e0..d3fcdbf5 100644 --- a/tests/unittests/flows/llm_flows/test_instructions.py +++ b/tests/unittests/flows/llm_flows/test_instructions.py @@ -551,6 +551,56 @@ def test_static_instruction_field_exists(llm_backend): assert agent.static_instruction == static_content +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +def test_static_instruction_supports_string(llm_backend): + """Test that static_instruction field supports simple strings.""" + static_str = "This is a static instruction as a string" + agent = LlmAgent(name="test_agent", static_instruction=static_str) + assert agent.static_instruction == static_str + assert isinstance(agent.static_instruction, str) + + +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +def test_static_instruction_supports_part(llm_backend): + """Test that static_instruction field supports types.Part.""" + static_part = types.Part(text="This is a static instruction as Part") + agent = LlmAgent(name="test_agent", static_instruction=static_part) + assert agent.static_instruction == static_part + assert isinstance(agent.static_instruction, types.Part) + + +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +def test_static_instruction_supports_file(llm_backend): + """Test that static_instruction field supports types.File.""" + static_file = types.File(uri="gs://bucket/file.txt", mime_type="text/plain") + agent = LlmAgent(name="test_agent", static_instruction=static_file) + assert agent.static_instruction == static_file + assert isinstance(agent.static_instruction, types.File) + + +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +def test_static_instruction_supports_list_of_parts(llm_backend): + """Test that static_instruction field supports list[PartUnion].""" + static_parts_list = [ + types.Part(text="First part"), + types.Part(text="Second part"), + ] + agent = LlmAgent(name="test_agent", static_instruction=static_parts_list) + assert agent.static_instruction == static_parts_list + assert isinstance(agent.static_instruction, list) + assert len(agent.static_instruction) == 2 + + +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +def test_static_instruction_supports_list_of_strings(llm_backend): + """Test that static_instruction field supports list of strings.""" + static_strings_list = ["First instruction", "Second instruction"] + agent = LlmAgent(name="test_agent", static_instruction=static_strings_list) + assert agent.static_instruction == static_strings_list + assert isinstance(agent.static_instruction, list) + assert all(isinstance(s, str) for s in agent.static_instruction) + + @pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) def test_static_instruction_supports_multiple_parts(llm_backend): """Test that static_instruction supports multiple parts including files.""" @@ -607,6 +657,91 @@ async def test_static_instruction_added_to_contents(llm_backend): assert llm_request.config.system_instruction == "Static instruction content" +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +@pytest.mark.asyncio +async def test_static_instruction_string_added_to_system(llm_backend): + """Test that string static instructions are added to system_instruction.""" + agent = LlmAgent( + name="test_agent", static_instruction="Static instruction as string" + ) + + invocation_context = await _create_invocation_context(agent) + + llm_request = LlmRequest() + + # Run the instruction processor + async for _ in request_processor.run_async(invocation_context, llm_request): + pass + + # Static instruction should be added to system instructions, not contents + assert len(llm_request.contents) == 0 + assert llm_request.config.system_instruction == "Static instruction as string" + + +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +@pytest.mark.asyncio +async def test_static_instruction_part_converted_to_system(llm_backend): + """Test that Part static instructions are converted and added to system_instruction.""" + static_part = types.Part(text="Static instruction from Part") + agent = LlmAgent(name="test_agent", static_instruction=static_part) + + invocation_context = await _create_invocation_context(agent) + llm_request = LlmRequest() + + # Run the instruction processor + async for _ in request_processor.run_async(invocation_context, llm_request): + pass + + # Part should be converted to Content and text extracted to system instruction + assert llm_request.config.system_instruction == "Static instruction from Part" + + +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +@pytest.mark.asyncio +async def test_static_instruction_list_of_parts_converted_to_system( + llm_backend, +): + """Test that list of Parts is converted and added to system_instruction.""" + static_parts_list = [ + types.Part(text="First part"), + types.Part(text="Second part"), + ] + agent = LlmAgent(name="test_agent", static_instruction=static_parts_list) + + invocation_context = await _create_invocation_context(agent) + llm_request = LlmRequest() + + # Run the instruction processor + async for _ in request_processor.run_async(invocation_context, llm_request): + pass + + # List of parts should be converted to Content with text extracted + assert llm_request.config.system_instruction == "First part\n\nSecond part" + + +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +@pytest.mark.asyncio +async def test_static_instruction_list_of_strings_converted_to_system( + llm_backend, +): + """Test that list of strings is converted and added to system_instruction.""" + static_strings_list = ["First instruction", "Second instruction"] + agent = LlmAgent(name="test_agent", static_instruction=static_strings_list) + + invocation_context = await _create_invocation_context(agent) + llm_request = LlmRequest() + + # Run the instruction processor + async for _ in request_processor.run_async(invocation_context, llm_request): + pass + + # List of strings should be converted to Content with text extracted + assert ( + llm_request.config.system_instruction + == "First instruction\n\nSecond instruction" + ) + + @pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) @pytest.mark.asyncio async def test_dynamic_instruction_without_static_goes_to_system(llm_backend): @@ -658,6 +793,36 @@ async def test_dynamic_instruction_with_static_not_in_system(llm_backend): assert llm_request.contents[0].parts[0].text == "Dynamic instruction content" +@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) +@pytest.mark.asyncio +async def test_dynamic_instruction_with_string_static_not_in_system( + llm_backend, +): + """Test that dynamic instructions go to user content when string static_instruction exists.""" + agent = LlmAgent( + name="test_agent", + instruction="Dynamic instruction content", + static_instruction="Static instruction as string", + ) + + invocation_context = await _create_invocation_context(agent) + + llm_request = LlmRequest() + + # Run the instruction processor + async for _ in request_processor.run_async(invocation_context, llm_request): + pass + + # Static instruction should be in system instructions + assert llm_request.config.system_instruction == "Static instruction as string" + + # Dynamic instruction should be added as user content + assert len(llm_request.contents) == 1 + assert llm_request.contents[0].role == "user" + assert len(llm_request.contents[0].parts) == 1 + assert llm_request.contents[0].parts[0].text == "Dynamic instruction content" + + @pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"]) @pytest.mark.asyncio async def test_dynamic_instructions_added_to_user_content(llm_backend):