diff --git a/src/google/adk/evaluation/conversation_scenarios.py b/src/google/adk/evaluation/conversation_scenarios.py new file mode 100644 index 00000000..1ad7dd73 --- /dev/null +++ b/src/google/adk/evaluation/conversation_scenarios.py @@ -0,0 +1,63 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import Union + +from google.genai import types as genai_types +from pydantic import Field + +from .common import EvalBaseModel + + +class ConversationScenario(EvalBaseModel): + """Scenario for a conversation between a simulated user and the Agent.""" + + starting_prompt: Union[str, genai_types.Content] + """Starting prompt for the conversation. + + This prompt acts as the first user message that is given to the Agent. Any + subsequent user messages are obtained by the system that is simulating the + user. + """ + + conversation_plan: str + """A plan that user simulation system needs to follow as it plays out the conversation. + + Example: + For a Travel Agent that has tools that let it book a flight and car, a sample + starting prompt could be: + + `I need to book a flight.` + + A conversation plan could look like: + + First, you want to book a one-way flight from SFO to LAX for next Tuesday. + You prefer a morning flight and your budget is under $150. If the agent finds + a valid flight, confirm the booking. Once confirmed, your next goal is to rent + a standard-size car for three days from the airport. Once both tasks are done, + your overall goal is complete. + """ + + +class ConversationScenarios(EvalBaseModel): + """A simple container for the list of ConversationScenario. + + Mainly serves the purpose of helping with serialization and deserialization. + """ + + scenarios: list[ConversationScenario] = Field( + default_factory=list, description="""A list of ConversationScenario.""" + ) diff --git a/src/google/adk/evaluation/eval_case.py b/src/google/adk/evaluation/eval_case.py index 1f736a7f..5e27aa41 100644 --- a/src/google/adk/evaluation/eval_case.py +++ b/src/google/adk/evaluation/eval_case.py @@ -24,6 +24,7 @@ from typing_extensions import TypeAlias from .app_details import AppDetails from .common import EvalBaseModel +from .conversation_scenarios import ConversationScenario from .eval_rubrics import Rubric @@ -119,14 +120,29 @@ class SessionInput(EvalBaseModel): """The state of the session.""" +StaticConversation: TypeAlias = list[Invocation] +"""A conversation where user's query for each invocation is already specified.""" + + class EvalCase(EvalBaseModel): """An eval case.""" eval_id: str """Unique identifier for the evaluation case.""" - conversation: list[Invocation] - """A conversation between the user and the Agent. The conversation can have any number of invocations.""" + conversation: Optional[StaticConversation] = None + """A static conversation between the user and the Agent. + + While creating an eval case you should specify either a `conversation` or a + `conversation_scenario`, but not both. + """ + + conversation_scenario: Optional[ConversationScenario] = None + """A conversation scenario that should be used by a UserSimulator. + + While creating an eval case you should specify either a `conversation` or a + `conversation_scenario`, but not both. + """ session_input: Optional[SessionInput] = None """Session input that will be passed on to the Agent during eval. diff --git a/src/google/adk/evaluation/eval_config.py b/src/google/adk/evaluation/eval_config.py index 74870dbc..e1b1fd6c 100644 --- a/src/google/adk/evaluation/eval_config.py +++ b/src/google/adk/evaluation/eval_config.py @@ -26,6 +26,7 @@ from pydantic import Field from ..evaluation.eval_metrics import EvalMetric from .eval_metrics import BaseCriterion from .eval_metrics import Threshold +from .user_simulator import BaseUserSimulatorConfig logger = logging.getLogger("google_adk." + __name__) @@ -70,6 +71,11 @@ the third one uses `LlmAsAJudgeCriterion`. """, ) + user_simulator_config: Optional[BaseUserSimulatorConfig] = Field( + default=None, + description="""Config to be used by the user simulator.""", + ) + _DEFAULT_EVAL_CONFIG = EvalConfig( criteria={"tool_trajectory_avg_score": 1.0, "response_match_score": 0.8} diff --git a/src/google/adk/evaluation/llm_backed_user_simulator.py b/src/google/adk/evaluation/llm_backed_user_simulator.py new file mode 100644 index 00000000..cda86b93 --- /dev/null +++ b/src/google/adk/evaluation/llm_backed_user_simulator.py @@ -0,0 +1,88 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import ClassVar +from typing import Optional + +from google.genai import types as genai_types +from pydantic import Field +from typing_extensions import override + +from ..events.event import Event +from ..utils.feature_decorator import experimental +from .evaluator import Evaluator +from .user_simulator import BaseUserSimulatorConfig +from .user_simulator import NextUserMessage +from .user_simulator import UserSimulator + + +class LlmBackedUserSimulatorConfig(BaseUserSimulatorConfig): + """Contains configurations required by an LLM backed user simulator.""" + + model: str = Field( + default="gemini-2.5-flash", + description="The model to use for user simulation.", + ) + + model_config: Optional[genai_types.GenerateContentConfig] = Field( + default=genai_types.GenerateContentConfig, + description="The configuration for the model.", + ) + + max_allowed_invocations: int = Field( + default=20, + description="""Maximum number of invocations allowed by the simulated +interaction. This property allows us to stop a run-off conversation, where the +agent and the user simulator get into an never ending loop. + +(Not recommended)If you don't want a limit, you can set the value to -1. + """, + ) + + +@experimental +class LlmBackedUserSimulator(UserSimulator): + """A UserSimulator that uses a LLM to generate messages on behalf of the user.""" + + config_type: ClassVar[type[LlmBackedUserSimulatorConfig]] = ( + LlmBackedUserSimulatorConfig + ) + + def __init__(self, *, config: BaseUserSimulatorConfig): + super().__init__(config, config_type=LlmBackedUserSimulator.config_type) + + @override + async def get_next_user_message( + self, + conversation_plan: str, + events: list[Event], + ) -> NextUserMessage: + """Returns the next user message to send to the agent with help from a LLM. + + Args: + conversation_plan: A plan that user simulation system needs to follow as + it plays out the conversation. + events: The unaltered conversation history between the user and the + agent(s) under evaluation. + """ + raise NotImplementedError() + + @override + def get_simulation_evaluator( + self, + ) -> Evaluator: + """Returns an Evaluator that evaluates if the simulation was successful or not.""" + raise NotImplementedError() diff --git a/src/google/adk/evaluation/user_simulator.py b/src/google/adk/evaluation/user_simulator.py new file mode 100644 index 00000000..39297805 --- /dev/null +++ b/src/google/adk/evaluation/user_simulator.py @@ -0,0 +1,102 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from abc import ABC +import enum +from typing import Optional + +from google.genai import types as genai_types +from pydantic import alias_generators +from pydantic import BaseModel +from pydantic import ConfigDict +from pydantic import Field +from pydantic import ValidationError + +from ..events.event import Event +from ..utils.feature_decorator import experimental +from .common import EvalBaseModel +from .evaluator import Evaluator + + +class BaseUserSimulatorConfig(BaseModel): + """Base class for configurations pertaining to User Simulator.""" + + model_config = ConfigDict( + alias_generator=alias_generators.to_camel, + populate_by_name=True, + extra="allow", + ) + + +class Status(enum.Enum): + """The resulting status of get_next_user_message().""" + + SUCCESS = "success" + TURN_LIMIT_REACHED = "turn_limit_reached" + STOP_SIGNAL_DETECTED = "stop_signal_detected" + NO_MESSAGE_GENERATED = "no_message_generated" + + +class NextUserMessage(EvalBaseModel): + status: Status = Field( + description="""The resulting status of `get_next_user_message()`. + +The caller of `get_next_user_message()` should inspect this field to determine +if the user simulator was able to successfully generate a message or why it was +not able to do so.""" + ) + + user_message: Optional[genai_types.Content] = Field( + description="""The next user message.""" + ) + + +@experimental +class UserSimulator(ABC): + """A user simulator for the purposes of automating interaction with an Agent.""" + + def __init__( + self, + config: BaseUserSimulatorConfig, + config_type: type[BaseUserSimulatorConfig], + ): + # Unpack the config to a specific type needed by the class implementing this + # interface. + try: + self._config = config_type.model_validate(config.model_dump()) + except ValidationError as e: + raise ValueError(f"Expect config of type `{config_type}`.") from e + + async def get_next_user_message( + self, + conversation_plan: str, + events: list[Event], + ) -> NextUserMessage: + """Returns the next user message to send to the agent. + + Args: + conversation_plan: A plan that user simulation system needs to follow as + it plays out the conversation. + events: The unaltered conversation history between the user and the + agent(s) under evaluation. + """ + raise NotImplementedError() + + def get_simulation_evaluator( + self, + ) -> Evaluator: + """Returns an instnace of an Evaluator that evaluates if the simulation was successful or not.""" + raise NotImplementedError()