feat: Support ContentUnion as static instruction

PiperOrigin-RevId: 817278990
2026-03-30 10:57:20 -07:00 · 2025-10-09 11:51:48 -07:00
parent 0aede9f1a1
commit cc24d616f8
4 changed files with 190 additions and 8 deletions
@@ -196,7 +196,7 @@ class LlmAgent(BaseAgent):
  or personality.
  """

-  static_instruction: Optional[types.Content] = None
+  static_instruction: Optional[types.ContentUnion] = None
  """Static instruction content sent literally as system instruction at the beginning.

  This field is for content that never changes and doesn't contain placeholders.
@@ -223,11 +223,20 @@ class LlmAgent(BaseAgent):
  For explicit caching control, configure context_cache_config at App level.

  **Content Support:**
-  Can contain text, files, binaries, or any combination as types.Content
-  supports multiple part types (text, inline_data, file_data, etc.).
+  Accepts types.ContentUnion which includes:
+  - str: Simple text instruction
+  - types.Content: Rich content object
+  - types.Part: Single part (text, inline_data, file_data, etc.)
+  - PIL.Image.Image: Image object
+  - types.File: File reference
+  - list[PartUnion]: List of parts

-  **Example:**
+  **Examples:**
  ```python
+  # Simple string instruction
+  static_instruction = "You are a helpful assistant."
+
+  # Rich content with files
  static_instruction = types.Content(
      role='user',
      parts=[
@@ -35,6 +35,10 @@ class LlmAgentConfig(BaseAgentConfig):

  model_config = ConfigDict(
      extra='forbid',
+      # Allow arbitrary types to support types.ContentUnion for static_instruction.
+      # ContentUnion includes PIL.Image.Image which doesn't have Pydantic schema
+      # support, but we validate it at runtime using google.genai._transformers.t_content()
+      arbitrary_types_allowed=True,
  )

  agent_class: str = Field(
@@ -62,14 +66,15 @@ class LlmAgentConfig(BaseAgentConfig):
      )
  )

-  static_instruction: Optional[types.Content] = Field(
+  static_instruction: Optional[types.ContentUnion] = Field(
      default=None,
      description=(
          'Optional. LlmAgent.static_instruction. Static content sent literally'
          ' at position 0 without placeholder processing. When set, changes'
          ' instruction behavior to go to user content instead of'
-          ' system_instruction. Supports context caching and rich content'
-          ' (text, files, binaries).'
+          ' system_instruction. Supports context caching. Accepts'
+          ' types.ContentUnion (str, types.Content, types.Part,'
+          ' PIL.Image.Image, types.File, or list[PartUnion]).'
      ),
  )

@@ -19,6 +19,7 @@ from __future__ import annotations
 from typing import AsyncGenerator
 from typing import TYPE_CHECKING

+from google.genai import _transformers
 from typing_extensions import override

 from ...agents.readonly_context import ReadonlyContext
@@ -84,7 +85,9 @@ class _InstructionsLlmRequestProcessor(BaseLlmRequestProcessor):

    # Handle static_instruction - add via append_instructions
    if agent.static_instruction:
-      llm_request.append_instructions(agent.static_instruction)
+      # Convert ContentUnion to Content using genai transformer
+      static_content = _transformers.t_content(agent.static_instruction)
+      llm_request.append_instructions(static_content)

    # Handle instruction based on whether static_instruction exists
    if agent.instruction and not agent.static_instruction:
@@ -551,6 +551,56 @@ def test_static_instruction_field_exists(llm_backend):
  assert agent.static_instruction == static_content


+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+def test_static_instruction_supports_string(llm_backend):
+  """Test that static_instruction field supports simple strings."""
+  static_str = "This is a static instruction as a string"
+  agent = LlmAgent(name="test_agent", static_instruction=static_str)
+  assert agent.static_instruction == static_str
+  assert isinstance(agent.static_instruction, str)
+
+
+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+def test_static_instruction_supports_part(llm_backend):
+  """Test that static_instruction field supports types.Part."""
+  static_part = types.Part(text="This is a static instruction as Part")
+  agent = LlmAgent(name="test_agent", static_instruction=static_part)
+  assert agent.static_instruction == static_part
+  assert isinstance(agent.static_instruction, types.Part)
+
+
+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+def test_static_instruction_supports_file(llm_backend):
+  """Test that static_instruction field supports types.File."""
+  static_file = types.File(uri="gs://bucket/file.txt", mime_type="text/plain")
+  agent = LlmAgent(name="test_agent", static_instruction=static_file)
+  assert agent.static_instruction == static_file
+  assert isinstance(agent.static_instruction, types.File)
+
+
+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+def test_static_instruction_supports_list_of_parts(llm_backend):
+  """Test that static_instruction field supports list[PartUnion]."""
+  static_parts_list = [
+      types.Part(text="First part"),
+      types.Part(text="Second part"),
+  ]
+  agent = LlmAgent(name="test_agent", static_instruction=static_parts_list)
+  assert agent.static_instruction == static_parts_list
+  assert isinstance(agent.static_instruction, list)
+  assert len(agent.static_instruction) == 2
+
+
+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+def test_static_instruction_supports_list_of_strings(llm_backend):
+  """Test that static_instruction field supports list of strings."""
+  static_strings_list = ["First instruction", "Second instruction"]
+  agent = LlmAgent(name="test_agent", static_instruction=static_strings_list)
+  assert agent.static_instruction == static_strings_list
+  assert isinstance(agent.static_instruction, list)
+  assert all(isinstance(s, str) for s in agent.static_instruction)
+
+
@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
 def test_static_instruction_supports_multiple_parts(llm_backend):
  """Test that static_instruction supports multiple parts including files."""
@@ -607,6 +657,91 @@ async def test_static_instruction_added_to_contents(llm_backend):
  assert llm_request.config.system_instruction == "Static instruction content"


+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+@pytest.mark.asyncio
+async def test_static_instruction_string_added_to_system(llm_backend):
+  """Test that string static instructions are added to system_instruction."""
+  agent = LlmAgent(
+      name="test_agent", static_instruction="Static instruction as string"
+  )
+
+  invocation_context = await _create_invocation_context(agent)
+
+  llm_request = LlmRequest()
+
+  # Run the instruction processor
+  async for _ in request_processor.run_async(invocation_context, llm_request):
+    pass
+
+  # Static instruction should be added to system instructions, not contents
+  assert len(llm_request.contents) == 0
+  assert llm_request.config.system_instruction == "Static instruction as string"
+
+
+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+@pytest.mark.asyncio
+async def test_static_instruction_part_converted_to_system(llm_backend):
+  """Test that Part static instructions are converted and added to system_instruction."""
+  static_part = types.Part(text="Static instruction from Part")
+  agent = LlmAgent(name="test_agent", static_instruction=static_part)
+
+  invocation_context = await _create_invocation_context(agent)
+  llm_request = LlmRequest()
+
+  # Run the instruction processor
+  async for _ in request_processor.run_async(invocation_context, llm_request):
+    pass
+
+  # Part should be converted to Content and text extracted to system instruction
+  assert llm_request.config.system_instruction == "Static instruction from Part"
+
+
+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+@pytest.mark.asyncio
+async def test_static_instruction_list_of_parts_converted_to_system(
+    llm_backend,
+):
+  """Test that list of Parts is converted and added to system_instruction."""
+  static_parts_list = [
+      types.Part(text="First part"),
+      types.Part(text="Second part"),
+  ]
+  agent = LlmAgent(name="test_agent", static_instruction=static_parts_list)
+
+  invocation_context = await _create_invocation_context(agent)
+  llm_request = LlmRequest()
+
+  # Run the instruction processor
+  async for _ in request_processor.run_async(invocation_context, llm_request):
+    pass
+
+  # List of parts should be converted to Content with text extracted
+  assert llm_request.config.system_instruction == "First part\n\nSecond part"
+
+
+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+@pytest.mark.asyncio
+async def test_static_instruction_list_of_strings_converted_to_system(
+    llm_backend,
+):
+  """Test that list of strings is converted and added to system_instruction."""
+  static_strings_list = ["First instruction", "Second instruction"]
+  agent = LlmAgent(name="test_agent", static_instruction=static_strings_list)
+
+  invocation_context = await _create_invocation_context(agent)
+  llm_request = LlmRequest()
+
+  # Run the instruction processor
+  async for _ in request_processor.run_async(invocation_context, llm_request):
+    pass
+
+  # List of strings should be converted to Content with text extracted
+  assert (
+      llm_request.config.system_instruction
+      == "First instruction\n\nSecond instruction"
+  )
+
+
@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
@pytest.mark.asyncio
 async def test_dynamic_instruction_without_static_goes_to_system(llm_backend):
@@ -658,6 +793,36 @@ async def test_dynamic_instruction_with_static_not_in_system(llm_backend):
  assert llm_request.contents[0].parts[0].text == "Dynamic instruction content"


+@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
+@pytest.mark.asyncio
+async def test_dynamic_instruction_with_string_static_not_in_system(
+    llm_backend,
+):
+  """Test that dynamic instructions go to user content when string static_instruction exists."""
+  agent = LlmAgent(
+      name="test_agent",
+      instruction="Dynamic instruction content",
+      static_instruction="Static instruction as string",
+  )
+
+  invocation_context = await _create_invocation_context(agent)
+
+  llm_request = LlmRequest()
+
+  # Run the instruction processor
+  async for _ in request_processor.run_async(invocation_context, llm_request):
+    pass
+
+  # Static instruction should be in system instructions
+  assert llm_request.config.system_instruction == "Static instruction as string"
+
+  # Dynamic instruction should be added as user content
+  assert len(llm_request.contents) == 1
+  assert llm_request.contents[0].role == "user"
+  assert len(llm_request.contents[0].parts) == 1
+  assert llm_request.contents[0].parts[0].text == "Dynamic instruction content"
+
+
@pytest.mark.parametrize("llm_backend", ["GOOGLE_AI", "VERTEX"])
@pytest.mark.asyncio
 async def test_dynamic_instructions_added_to_user_content(llm_backend):