diff --git a/src/google/adk/cli/cli_eval.py b/src/google/adk/cli/cli_eval.py index dcd6652c..4f285d61 100644 --- a/src/google/adk/cli/cli_eval.py +++ b/src/google/adk/cli/cli_eval.py @@ -47,6 +47,7 @@ from ..evaluation.eval_metrics import EvalMetricResult from ..evaluation.eval_metrics import EvalMetricResultPerInvocation from ..evaluation.eval_metrics import JudgeModelOptions from ..evaluation.eval_result import EvalCaseResult +from ..evaluation.eval_sets_manager import EvalSetsManager from ..evaluation.evaluator import EvalStatus from ..evaluation.evaluator import Evaluator from ..sessions.base_session_service import BaseSessionService @@ -436,3 +437,22 @@ def _get_evaluator(eval_metric: EvalMetric) -> Evaluator: return FinalResponseMatchV2Evaluator(eval_metric) raise ValueError(f"Unsupported eval metric: {eval_metric}") + + +def get_eval_sets_manager( + eval_storage_uri: Optional[str], agents_dir: str +) -> EvalSetsManager: + """Returns an instance of EvalSetsManager.""" + try: + from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager + from .utils import evals + except ModuleNotFoundError as mnf: + raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf + + if eval_storage_uri: + gcs_eval_managers = evals.create_gcs_eval_managers_from_uri( + eval_storage_uri + ) + return gcs_eval_managers.eval_sets_manager + else: + return LocalEvalSetsManager(agents_dir=agents_dir) diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py index e76d903f..7115b5fc 100644 --- a/src/google/adk/cli/cli_tools_click.py +++ b/src/google/adk/cli/cli_tools_click.py @@ -18,6 +18,8 @@ import asyncio from contextlib import asynccontextmanager from datetime import datetime import functools +import hashlib +import json import logging import os from pathlib import Path @@ -433,6 +435,28 @@ def cli_run( ) +def eval_options(): + """Decorator to add common eval options to click commands.""" + + def decorator(func): + @click.option( + "--eval_storage_uri", + type=str, + help=( + "Optional. The evals storage URI to store agent evals," + " supported URIs: gs://." + ), + default=None, + ) + @functools.wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + + return wrapper + + return decorator + + @main.command("eval", cls=HelpfulCommand) @click.argument( "agent_module_file_path", @@ -449,15 +473,7 @@ def cli_run( default=False, help="Optional. Whether to print detailed results on console or not.", ) -@click.option( - "--eval_storage_uri", - type=str, - help=( - "Optional. The evals storage URI to store agent evals," - " supported URIs: gs://." - ), - default=None, -) +@eval_options() def cli_eval( agent_module_file_path: str, eval_set_file_path_or_id: list[str], @@ -675,6 +691,138 @@ def cli_eval( pretty_print_eval_result(eval_result) +@main.group("eval_set") +def eval_set(): + """Manage Eval Sets.""" + pass + + +@eval_set.command("create", cls=HelpfulCommand) +@click.argument( + "agent_module_file_path", + type=click.Path( + exists=True, dir_okay=True, file_okay=False, resolve_path=True + ), +) +@click.argument("eval_set_id", type=str, required=True) +@eval_options() +def cli_create_eval_set( + agent_module_file_path: str, + eval_set_id: str, + eval_storage_uri: Optional[str] = None, +): + """Creates an empty EvalSet given the agent_module_file_path and eval_set_id.""" + from .cli_eval import get_eval_sets_manager + + app_name = os.path.basename(agent_module_file_path) + agents_dir = os.path.dirname(agent_module_file_path) + eval_sets_manager = get_eval_sets_manager(eval_storage_uri, agents_dir) + + try: + eval_sets_manager.create_eval_set( + app_name=app_name, eval_set_id=eval_set_id + ) + click.echo(f"Eval set '{eval_set_id}' created for app '{app_name}'.") + except ValueError as e: + raise click.ClickException(str(e)) + + +@eval_set.command("add_eval_case", cls=HelpfulCommand) +@click.argument( + "agent_module_file_path", + type=click.Path( + exists=True, dir_okay=True, file_okay=False, resolve_path=True + ), +) +@click.argument("eval_set_id", type=str, required=True) +@click.option( + "--scenarios_file", + type=click.Path( + exists=True, dir_okay=False, file_okay=True, resolve_path=True + ), + help="A path to file containing JSON serialized ConversationScenarios.", + required=True, +) +@click.option( + "--session_input_file", + type=click.Path( + exists=True, dir_okay=False, file_okay=True, resolve_path=True + ), + help=( + "Optional. Path to session file containing SessionInput in JSON format." + ), + default=None, +) +@eval_options() +def cli_add_eval_case( + agent_module_file_path: str, + eval_set_id: str, + scenarios_file: str, + eval_storage_uri: Optional[str] = None, + session_input_file: Optional[str] = None, +): + """Adds eval cases to the given eval set. + + There are several ways that an eval case can be created, for now this method + only supports adding one using a conversation scenarios file. + + If an eval case for the generated id already exists, then we skip adding it. + """ + try: + from ..evaluation.conversation_scenarios import ConversationScenarios + from ..evaluation.eval_case import EvalCase + from ..evaluation.eval_case import SessionInput + from .cli_eval import get_eval_sets_manager + except ModuleNotFoundError as mnf: + raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf + + app_name = os.path.basename(agent_module_file_path) + agents_dir = os.path.dirname(agent_module_file_path) + eval_sets_manager = get_eval_sets_manager(eval_storage_uri, agents_dir) + + try: + session_input = None + if session_input_file: + with open(session_input_file, "r") as f: + session_input = SessionInput.model_validate_json(f.read()) + + with open(scenarios_file, "r") as f: + conversation_scenarios = ConversationScenarios.model_validate_json( + f.read() + ) + + for scenario in conversation_scenarios.scenarios: + scenario_str = json.dumps(scenario.model_dump(), sort_keys=True) + eval_id = hashlib.sha256(scenario_str.encode("utf-8")).hexdigest()[:8] + eval_case = EvalCase( + eval_id=eval_id, + conversation_scenario=scenario, + session_input=session_input, + creation_timestamp=datetime.now().timestamp(), + ) + + if ( + eval_sets_manager.get_eval_case( + app_name=app_name, eval_set_id=eval_set_id, eval_case_id=eval_id + ) + is None + ): + eval_sets_manager.add_eval_case( + app_name=app_name, eval_set_id=eval_set_id, eval_case=eval_case + ) + click.echo( + f"Eval case '{eval_case.eval_id}' added to eval set" + f" '{eval_set_id}'." + ) + else: + click.echo( + f"Eval case '{eval_case.eval_id}' already exists in eval set" + f" '{eval_set_id}', skipped adding." + ) + except Exception as e: + raise click.ClickException(f"Failed to add eval case(s): {e}") from e + + def web_options(): """Decorator to add web UI options to click commands.""" diff --git a/tests/unittests/cli/utils/test_cli_eval.py b/tests/unittests/cli/utils/test_cli_eval.py new file mode 100644 index 00000000..8ff33dd9 --- /dev/null +++ b/tests/unittests/cli/utils/test_cli_eval.py @@ -0,0 +1,51 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for utilities in cli_eval.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest import mock + + +def test_get_eval_sets_manager_local(monkeypatch): + mock_local_manager = mock.MagicMock() + monkeypatch.setattr( + "google.adk.evaluation.local_eval_sets_manager.LocalEvalSetsManager", + lambda *a, **k: mock_local_manager, + ) + from google.adk.cli.cli_eval import get_eval_sets_manager + + manager = get_eval_sets_manager(eval_storage_uri=None, agents_dir="some/dir") + assert manager == mock_local_manager + + +def test_get_eval_sets_manager_gcs(monkeypatch): + mock_gcs_manager = mock.MagicMock() + mock_create_gcs = mock.MagicMock() + mock_create_gcs.return_value = SimpleNamespace( + eval_sets_manager=mock_gcs_manager + ) + monkeypatch.setattr( + "google.adk.cli.utils.evals.create_gcs_eval_managers_from_uri", + mock_create_gcs, + ) + from google.adk.cli.cli_eval import get_eval_sets_manager + + manager = get_eval_sets_manager( + eval_storage_uri="gs://bucket", agents_dir="some/dir" + ) + assert manager == mock_gcs_manager + mock_create_gcs.assert_called_once_with("gs://bucket") diff --git a/tests/unittests/cli/utils/test_cli_tools_click.py b/tests/unittests/cli/utils/test_cli_tools_click.py index 138289ed..3bd02b39 100644 --- a/tests/unittests/cli/utils/test_cli_tools_click.py +++ b/tests/unittests/cli/utils/test_cli_tools_click.py @@ -18,6 +18,7 @@ from __future__ import annotations import builtins +import json from pathlib import Path from types import SimpleNamespace from typing import Any @@ -620,6 +621,177 @@ def test_cli_eval_with_eval_set_id( assert len(eval_set_results) == 2 +def test_cli_create_eval_set(tmp_path: Path): + app_name = "test_app" + eval_set_id = "test_eval_set" + agent_path = tmp_path / app_name + agent_path.mkdir() + (agent_path / "__init__.py").touch() + + runner = CliRunner() + result = runner.invoke( + cli_tools_click.main, + ["eval_set", "create", str(agent_path), eval_set_id], + ) + + assert result.exit_code == 0 + eval_set_file = agent_path / f"{eval_set_id}.evalset.json" + assert eval_set_file.exists() + with open(eval_set_file, "r") as f: + eval_set_data = json.load(f) + assert eval_set_data["eval_set_id"] == eval_set_id + assert eval_set_data["eval_cases"] == [] + + +def test_cli_add_eval_case_no_session(tmp_path: Path): + app_name = "test_app_add_1" + eval_set_id = "test_eval_set_add_1" + agent_path = tmp_path / app_name + agent_path.mkdir() + (agent_path / "__init__.py").touch() + + scenarios_file = tmp_path / "scenarios1.json" + scenarios_file.write_text( + '{"scenarios": [{"starting_prompt": "hello", "conversation_plan":' + ' "world"}]}' + ) + + runner = CliRunner() + runner.invoke( + cli_tools_click.main, + ["eval_set", "create", str(agent_path), eval_set_id], + catch_exceptions=False, + ) + result = runner.invoke( + cli_tools_click.main, + [ + "eval_set", + "add_eval_case", + str(agent_path), + eval_set_id, + "--scenarios_file", + str(scenarios_file), + ], + catch_exceptions=False, + ) + + assert result.exit_code == 0 + eval_set_file = agent_path / f"{eval_set_id}.evalset.json" + assert eval_set_file.exists() + with open(eval_set_file, "r") as f: + eval_set_data = json.load(f) + assert len(eval_set_data["eval_cases"]) == 1 + eval_case = eval_set_data["eval_cases"][0] + assert eval_case["eval_id"] == "0a1a5048" + assert eval_case["conversation_scenario"]["starting_prompt"] == "hello" + assert "session_input" not in eval_case + + +def test_cli_add_eval_case_with_session(tmp_path: Path): + app_name = "test_app_add_2" + eval_set_id = "test_eval_set_add_2" + agent_path = tmp_path / app_name + agent_path.mkdir() + (agent_path / "__init__.py").touch() + + scenarios_file = tmp_path / "scenarios2.json" + scenarios_file.write_text( + '{"scenarios": [{"starting_prompt": "hello", "conversation_plan":' + ' "world"}]}' + ) + session_file = tmp_path / "session2.json" + session_file.write_text( + '{"app_name": "test_app_add_2", "user_id": "test_user", "state": {}}' + ) + + runner = CliRunner() + runner.invoke( + cli_tools_click.main, + ["eval_set", "create", str(agent_path), eval_set_id], + catch_exceptions=False, + ) + result = runner.invoke( + cli_tools_click.main, + [ + "eval_set", + "add_eval_case", + str(agent_path), + eval_set_id, + "--scenarios_file", + str(scenarios_file), + "--session_input_file", + str(session_file), + ], + catch_exceptions=False, + ) + + assert result.exit_code == 0 + eval_set_file = agent_path / f"{eval_set_id}.evalset.json" + assert eval_set_file.exists() + with open(eval_set_file, "r") as f: + eval_set_data = json.load(f) + assert len(eval_set_data["eval_cases"]) == 1 + eval_case = eval_set_data["eval_cases"][0] + assert eval_case["eval_id"] == "0a1a5048" + assert eval_case["session_input"]["app_name"] == "test_app_add_2" + + +def test_cli_add_eval_case_skip_existing(tmp_path: Path): + app_name = "test_app_add_3" + eval_set_id = "test_eval_set_add_3" + agent_path = tmp_path / app_name + agent_path.mkdir() + (agent_path / "__init__.py").touch() + + scenarios_file = tmp_path / "scenarios3.json" + scenarios_file.write_text( + '{"scenarios": [{"starting_prompt": "hello", "conversation_plan":' + ' "world"}]}' + ) + + runner = CliRunner() + runner.invoke( + cli_tools_click.main, + ["eval_set", "create", str(agent_path), eval_set_id], + catch_exceptions=False, + ) + result1 = runner.invoke( + cli_tools_click.main, + [ + "eval_set", + "add_eval_case", + str(agent_path), + eval_set_id, + "--scenarios_file", + str(scenarios_file), + ], + catch_exceptions=False, + ) + eval_set_file = agent_path / f"{eval_set_id}.evalset.json" + with open(eval_set_file, "r") as f: + eval_set_data1 = json.load(f) + + result2 = runner.invoke( + cli_tools_click.main, + [ + "eval_set", + "add_eval_case", + str(agent_path), + eval_set_id, + "--scenarios_file", + str(scenarios_file), + ], + catch_exceptions=False, + ) + with open(eval_set_file, "r") as f: + eval_set_data2 = json.load(f) + + assert result1.exit_code == 0 + assert result2.exit_code == 0 + assert len(eval_set_data1["eval_cases"]) == 1 + assert len(eval_set_data2["eval_cases"]) == 1 + + def test_cli_deploy_cloud_run_gcloud_arg_conflict( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: