feat: Adds data model to support UserSimulation

Details:
- Introduces a concept of `ConversationScenario` to represent a scenario that user simulator is supposed to follow.
- Introduces a `UserSimulator` interface, that one should implement. UserSimulator interface will be integrated with LocalEvalService in subsequent PRs.
PiperOrigin-RevId: 816883699
This commit is contained in:
Ankur Sharma
2025-10-08 15:23:21 -07:00
committed by Copybara-Service
parent 2efaa57575
commit cbe60c47aa
5 changed files with 277 additions and 2 deletions
@@ -0,0 +1,63 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from typing import Union
from google.genai import types as genai_types
from pydantic import Field
from .common import EvalBaseModel
class ConversationScenario(EvalBaseModel):
"""Scenario for a conversation between a simulated user and the Agent."""
starting_prompt: Union[str, genai_types.Content]
"""Starting prompt for the conversation.
This prompt acts as the first user message that is given to the Agent. Any
subsequent user messages are obtained by the system that is simulating the
user.
"""
conversation_plan: str
"""A plan that user simulation system needs to follow as it plays out the conversation.
Example:
For a Travel Agent that has tools that let it book a flight and car, a sample
starting prompt could be:
`I need to book a flight.`
A conversation plan could look like:
First, you want to book a one-way flight from SFO to LAX for next Tuesday.
You prefer a morning flight and your budget is under $150. If the agent finds
a valid flight, confirm the booking. Once confirmed, your next goal is to rent
a standard-size car for three days from the airport. Once both tasks are done,
your overall goal is complete.
"""
class ConversationScenarios(EvalBaseModel):
"""A simple container for the list of ConversationScenario.
Mainly serves the purpose of helping with serialization and deserialization.
"""
scenarios: list[ConversationScenario] = Field(
default_factory=list, description="""A list of ConversationScenario."""
)
+18 -2
View File
@@ -24,6 +24,7 @@ from typing_extensions import TypeAlias
from .app_details import AppDetails
from .common import EvalBaseModel
from .conversation_scenarios import ConversationScenario
from .eval_rubrics import Rubric
@@ -119,14 +120,29 @@ class SessionInput(EvalBaseModel):
"""The state of the session."""
StaticConversation: TypeAlias = list[Invocation]
"""A conversation where user's query for each invocation is already specified."""
class EvalCase(EvalBaseModel):
"""An eval case."""
eval_id: str
"""Unique identifier for the evaluation case."""
conversation: list[Invocation]
"""A conversation between the user and the Agent. The conversation can have any number of invocations."""
conversation: Optional[StaticConversation] = None
"""A static conversation between the user and the Agent.
While creating an eval case you should specify either a `conversation` or a
`conversation_scenario`, but not both.
"""
conversation_scenario: Optional[ConversationScenario] = None
"""A conversation scenario that should be used by a UserSimulator.
While creating an eval case you should specify either a `conversation` or a
`conversation_scenario`, but not both.
"""
session_input: Optional[SessionInput] = None
"""Session input that will be passed on to the Agent during eval.
+6
View File
@@ -26,6 +26,7 @@ from pydantic import Field
from ..evaluation.eval_metrics import EvalMetric
from .eval_metrics import BaseCriterion
from .eval_metrics import Threshold
from .user_simulator import BaseUserSimulatorConfig
logger = logging.getLogger("google_adk." + __name__)
@@ -70,6 +71,11 @@ the third one uses `LlmAsAJudgeCriterion`.
""",
)
user_simulator_config: Optional[BaseUserSimulatorConfig] = Field(
default=None,
description="""Config to be used by the user simulator.""",
)
_DEFAULT_EVAL_CONFIG = EvalConfig(
criteria={"tool_trajectory_avg_score": 1.0, "response_match_score": 0.8}
@@ -0,0 +1,88 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from typing import ClassVar
from typing import Optional
from google.genai import types as genai_types
from pydantic import Field
from typing_extensions import override
from ..events.event import Event
from ..utils.feature_decorator import experimental
from .evaluator import Evaluator
from .user_simulator import BaseUserSimulatorConfig
from .user_simulator import NextUserMessage
from .user_simulator import UserSimulator
class LlmBackedUserSimulatorConfig(BaseUserSimulatorConfig):
"""Contains configurations required by an LLM backed user simulator."""
model: str = Field(
default="gemini-2.5-flash",
description="The model to use for user simulation.",
)
model_config: Optional[genai_types.GenerateContentConfig] = Field(
default=genai_types.GenerateContentConfig,
description="The configuration for the model.",
)
max_allowed_invocations: int = Field(
default=20,
description="""Maximum number of invocations allowed by the simulated
interaction. This property allows us to stop a run-off conversation, where the
agent and the user simulator get into an never ending loop.
(Not recommended)If you don't want a limit, you can set the value to -1.
""",
)
@experimental
class LlmBackedUserSimulator(UserSimulator):
"""A UserSimulator that uses a LLM to generate messages on behalf of the user."""
config_type: ClassVar[type[LlmBackedUserSimulatorConfig]] = (
LlmBackedUserSimulatorConfig
)
def __init__(self, *, config: BaseUserSimulatorConfig):
super().__init__(config, config_type=LlmBackedUserSimulator.config_type)
@override
async def get_next_user_message(
self,
conversation_plan: str,
events: list[Event],
) -> NextUserMessage:
"""Returns the next user message to send to the agent with help from a LLM.
Args:
conversation_plan: A plan that user simulation system needs to follow as
it plays out the conversation.
events: The unaltered conversation history between the user and the
agent(s) under evaluation.
"""
raise NotImplementedError()
@override
def get_simulation_evaluator(
self,
) -> Evaluator:
"""Returns an Evaluator that evaluates if the simulation was successful or not."""
raise NotImplementedError()
+102
View File
@@ -0,0 +1,102 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from abc import ABC
import enum
from typing import Optional
from google.genai import types as genai_types
from pydantic import alias_generators
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import ValidationError
from ..events.event import Event
from ..utils.feature_decorator import experimental
from .common import EvalBaseModel
from .evaluator import Evaluator
class BaseUserSimulatorConfig(BaseModel):
"""Base class for configurations pertaining to User Simulator."""
model_config = ConfigDict(
alias_generator=alias_generators.to_camel,
populate_by_name=True,
extra="allow",
)
class Status(enum.Enum):
"""The resulting status of get_next_user_message()."""
SUCCESS = "success"
TURN_LIMIT_REACHED = "turn_limit_reached"
STOP_SIGNAL_DETECTED = "stop_signal_detected"
NO_MESSAGE_GENERATED = "no_message_generated"
class NextUserMessage(EvalBaseModel):
status: Status = Field(
description="""The resulting status of `get_next_user_message()`.
The caller of `get_next_user_message()` should inspect this field to determine
if the user simulator was able to successfully generate a message or why it was
not able to do so."""
)
user_message: Optional[genai_types.Content] = Field(
description="""The next user message."""
)
@experimental
class UserSimulator(ABC):
"""A user simulator for the purposes of automating interaction with an Agent."""
def __init__(
self,
config: BaseUserSimulatorConfig,
config_type: type[BaseUserSimulatorConfig],
):
# Unpack the config to a specific type needed by the class implementing this
# interface.
try:
self._config = config_type.model_validate(config.model_dump())
except ValidationError as e:
raise ValueError(f"Expect config of type `{config_type}`.") from e
async def get_next_user_message(
self,
conversation_plan: str,
events: list[Event],
) -> NextUserMessage:
"""Returns the next user message to send to the agent.
Args:
conversation_plan: A plan that user simulation system needs to follow as
it plays out the conversation.
events: The unaltered conversation history between the user and the
agent(s) under evaluation.
"""
raise NotImplementedError()
def get_simulation_evaluator(
self,
) -> Evaluator:
"""Returns an instnace of an Evaluator that evaluates if the simulation was successful or not."""
raise NotImplementedError()