feat: Adds data model to support UserSimulation

Details: - Introduces a concept of `ConversationScenario` to represent a scenario that user simulator is supposed to follow. - Introduces a `UserSimulator` interface, that one should implement. UserSimulator interface will be integrated with LocalEvalService in subsequent PRs. PiperOrigin-RevId: 816883699
2026-03-30 10:57:20 -07:00 · 2025-10-08 15:23:21 -07:00
parent 2efaa57575
commit cbe60c47aa
5 changed files with 277 additions and 2 deletions
@@ -0,0 +1,63 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Union
+
+from google.genai import types as genai_types
+from pydantic import Field
+
+from .common import EvalBaseModel
+
+
+class ConversationScenario(EvalBaseModel):
+  """Scenario for a conversation between a simulated user and the Agent."""
+
+  starting_prompt: Union[str, genai_types.Content]
+  """Starting prompt for the conversation.
+
+  This prompt acts as the first user message that is given to the Agent. Any
+  subsequent user messages are obtained by the system that is simulating the
+  user.
+  """
+
+  conversation_plan: str
+  """A plan that user simulation system needs to follow as it plays out the conversation.
+
+  Example:
+  For a Travel Agent that has tools that let it book a flight and car, a sample
+  starting prompt could be:
+
+  `I need to book a flight.`
+
+  A conversation plan could look like:
+
+  First, you want to book a one-way flight from SFO to LAX for next Tuesday.
+  You prefer a morning flight and your budget is under $150. If the agent finds
+  a valid flight, confirm the booking. Once confirmed, your next goal is to rent
+  a standard-size car for three days from the airport. Once both tasks are done,
+  your overall goal is complete.
+  """
+
+
+class ConversationScenarios(EvalBaseModel):
+  """A simple container for the list of ConversationScenario.
+
+  Mainly serves the purpose of helping with serialization and deserialization.
+  """
+
+  scenarios: list[ConversationScenario] = Field(
+      default_factory=list, description="""A list of ConversationScenario."""
+  )
@@ -24,6 +24,7 @@ from typing_extensions import TypeAlias

 from .app_details import AppDetails
 from .common import EvalBaseModel
+from .conversation_scenarios import ConversationScenario
 from .eval_rubrics import Rubric


@@ -119,14 +120,29 @@ class SessionInput(EvalBaseModel):
  """The state of the session."""


+StaticConversation: TypeAlias = list[Invocation]
+"""A conversation where user's query for each invocation is already specified."""
+
+
 class EvalCase(EvalBaseModel):
  """An eval case."""

  eval_id: str
  """Unique identifier for the evaluation case."""

-  conversation: list[Invocation]
-  """A conversation between the user and the Agent. The conversation can have any number of invocations."""
+  conversation: Optional[StaticConversation] = None
+  """A static conversation between the user and the Agent.
+
+   While creating an eval case you should specify either a `conversation` or a
+  `conversation_scenario`, but not both.
+  """
+
+  conversation_scenario: Optional[ConversationScenario] = None
+  """A conversation scenario that should be used by a UserSimulator.
+
+  While creating an eval case you should specify either a `conversation` or a
+  `conversation_scenario`, but not both.
+  """

  session_input: Optional[SessionInput] = None
  """Session input that will be passed on to the Agent during eval.
@@ -26,6 +26,7 @@ from pydantic import Field
 from ..evaluation.eval_metrics import EvalMetric
 from .eval_metrics import BaseCriterion
 from .eval_metrics import Threshold
+from .user_simulator import BaseUserSimulatorConfig

 logger = logging.getLogger("google_adk." + __name__)

@@ -70,6 +71,11 @@ the third one uses `LlmAsAJudgeCriterion`.
 """,
  )

+  user_simulator_config: Optional[BaseUserSimulatorConfig] = Field(
+      default=None,
+      description="""Config to be used by the user simulator.""",
+  )
+

 _DEFAULT_EVAL_CONFIG = EvalConfig(
    criteria={"tool_trajectory_avg_score": 1.0, "response_match_score": 0.8}
@@ -0,0 +1,88 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import ClassVar
+from typing import Optional
+
+from google.genai import types as genai_types
+from pydantic import Field
+from typing_extensions import override
+
+from ..events.event import Event
+from ..utils.feature_decorator import experimental
+from .evaluator import Evaluator
+from .user_simulator import BaseUserSimulatorConfig
+from .user_simulator import NextUserMessage
+from .user_simulator import UserSimulator
+
+
+class LlmBackedUserSimulatorConfig(BaseUserSimulatorConfig):
+  """Contains configurations required by an LLM backed user simulator."""
+
+  model: str = Field(
+      default="gemini-2.5-flash",
+      description="The model to use for user simulation.",
+  )
+
+  model_config: Optional[genai_types.GenerateContentConfig] = Field(
+      default=genai_types.GenerateContentConfig,
+      description="The configuration for the model.",
+  )
+
+  max_allowed_invocations: int = Field(
+      default=20,
+      description="""Maximum number of invocations allowed by the simulated
+interaction.  This property allows us to stop a run-off conversation, where the
+agent and the user simulator get into an never ending loop.
+
+(Not recommended)If you don't want a limit, you can set the value to -1.
+      """,
+  )
+
+
+@experimental
+class LlmBackedUserSimulator(UserSimulator):
+  """A UserSimulator that uses a LLM to generate messages on behalf of the user."""
+
+  config_type: ClassVar[type[LlmBackedUserSimulatorConfig]] = (
+      LlmBackedUserSimulatorConfig
+  )
+
+  def __init__(self, *, config: BaseUserSimulatorConfig):
+    super().__init__(config, config_type=LlmBackedUserSimulator.config_type)
+
+  @override
+  async def get_next_user_message(
+      self,
+      conversation_plan: str,
+      events: list[Event],
+  ) -> NextUserMessage:
+    """Returns the next user message to send to the agent with help from a LLM.
+
+    Args:
+      conversation_plan: A plan that user simulation system needs to follow as
+        it plays out the conversation.
+      events: The unaltered conversation history between the user and the
+        agent(s) under evaluation.
+    """
+    raise NotImplementedError()
+
+  @override
+  def get_simulation_evaluator(
+      self,
+  ) -> Evaluator:
+    """Returns an Evaluator that evaluates if the simulation was successful or not."""
+    raise NotImplementedError()
@@ -0,0 +1,102 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from abc import ABC
+import enum
+from typing import Optional
+
+from google.genai import types as genai_types
+from pydantic import alias_generators
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from pydantic import Field
+from pydantic import ValidationError
+
+from ..events.event import Event
+from ..utils.feature_decorator import experimental
+from .common import EvalBaseModel
+from .evaluator import Evaluator
+
+
+class BaseUserSimulatorConfig(BaseModel):
+  """Base class for configurations pertaining to User Simulator."""
+
+  model_config = ConfigDict(
+      alias_generator=alias_generators.to_camel,
+      populate_by_name=True,
+      extra="allow",
+  )
+
+
+class Status(enum.Enum):
+  """The resulting status of get_next_user_message()."""
+
+  SUCCESS = "success"
+  TURN_LIMIT_REACHED = "turn_limit_reached"
+  STOP_SIGNAL_DETECTED = "stop_signal_detected"
+  NO_MESSAGE_GENERATED = "no_message_generated"
+
+
+class NextUserMessage(EvalBaseModel):
+  status: Status = Field(
+      description="""The resulting status of `get_next_user_message()`.
+
+The caller of `get_next_user_message()` should inspect this field to determine
+if the user simulator was able to successfully generate a message or why it was
+not able to do so."""
+  )
+
+  user_message: Optional[genai_types.Content] = Field(
+      description="""The next user message."""
+  )
+
+
+@experimental
+class UserSimulator(ABC):
+  """A user simulator for the purposes of automating interaction with an Agent."""
+
+  def __init__(
+      self,
+      config: BaseUserSimulatorConfig,
+      config_type: type[BaseUserSimulatorConfig],
+  ):
+    # Unpack the config to a specific type needed by the class implementing this
+    # interface.
+    try:
+      self._config = config_type.model_validate(config.model_dump())
+    except ValidationError as e:
+      raise ValueError(f"Expect config of type `{config_type}`.") from e
+
+  async def get_next_user_message(
+      self,
+      conversation_plan: str,
+      events: list[Event],
+  ) -> NextUserMessage:
+    """Returns the next user message to send to the agent.
+
+    Args:
+      conversation_plan: A plan that user simulation system needs to follow as
+        it plays out the conversation.
+      events: The unaltered conversation history between the user and the
+        agent(s) under evaluation.
+    """
+    raise NotImplementedError()
+
+  def get_simulation_evaluator(
+      self,
+  ) -> Evaluator:
+    """Returns an instnace of an Evaluator that evaluates if the simulation was successful or not."""
+    raise NotImplementedError()