feat: Update EvalConfig and EvalMetric data models to support custom metrics

Co-authored-by: Joseph Pagadora <jcpagadora@google.com>
PiperOrigin-RevId: 855517478
This commit is contained in:
Joseph Pagadora
2026-01-12 21:36:44 -08:00
committed by Copybara-Service
parent 905604faac
commit 6d2f33a59c
3 changed files with 88 additions and 0 deletions
+46
View File
@@ -23,7 +23,9 @@ from pydantic import alias_generators
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import model_validator
from ..agents.common_configs import CodeConfig
from ..evaluation.eval_metrics import EvalMetric
from .eval_metrics import BaseCriterion
from .eval_metrics import Threshold
@@ -72,11 +74,46 @@ the third one uses `LlmAsAJudgeCriterion`.
""",
)
custom_metrics: Optional[dict[str, CodeConfig]] = Field(
default=None,
description="""A dictionary mapping custom metric names to CodeConfig
objects, which specify the path to the function for each custom metric.
If a metric name in `criteria` is also present in `custom_metrics`, the
corresponding `CodeConfig`'s `name` field will be used to locate the custom
metric implementation. The `name` field should contain the fully qualified
path to the custom metric function, e.g., `my.custom.metrics.metric_function`.
Example:
{
"criteria": {
"my_custom_metric": 0.5
},
"custom_metrics": {
"my_custom_metric": {
"name": "path.to.my.custom.metric.function"
}
}
}
""",
)
user_simulator_config: Optional[BaseUserSimulatorConfig] = Field(
default=None,
description="Config to be used by the user simulator.",
)
@model_validator(mode="after")
def check_custom_metrics_code_config_args(self) -> "EvalConfig":
if self.custom_metrics:
for metric_name, metric_config in self.custom_metrics.items():
if metric_config.args:
raise ValueError(
f"args field in CodeConfig for custom metric '{metric_name}' is"
" not supported."
)
return self
_DEFAULT_EVAL_CONFIG = EvalConfig(
criteria={"tool_trajectory_avg_score": 1.0, "response_match_score": 0.8}
@@ -106,12 +143,20 @@ def get_eval_metrics_from_config(eval_config: EvalConfig) -> list[EvalMetric]:
eval_metric_list = []
if eval_config.criteria:
for metric_name, criterion in eval_config.criteria.items():
custom_function_path = None
if (
eval_config.custom_metrics
and metric_name in eval_config.custom_metrics
):
custom_function_path = eval_config.custom_metrics[metric_name].name
if isinstance(criterion, float):
eval_metric_list.append(
EvalMetric(
metric_name=metric_name,
threshold=criterion,
criterion=BaseCriterion(threshold=criterion),
custom_function_path=custom_function_path,
)
)
elif isinstance(criterion, BaseCriterion):
@@ -120,6 +165,7 @@ def get_eval_metrics_from_config(eval_config: EvalConfig) -> list[EvalMetric]:
metric_name=metric_name,
threshold=criterion.threshold,
criterion=criterion,
custom_function_path=custom_function_path,
)
)
else:
@@ -279,6 +279,11 @@ class EvalMetric(EvalBaseModel):
default=None, description="""Evaluation criterion used by the metric."""
)
custom_function_path: Optional[str] = Field(
default=None,
description="""Path to custom function, if this is a custom metric.""",
)
class EvalMetricResultDetails(EvalBaseModel):
rubric_scores: Optional[list[RubricScore]] = Field(
@@ -20,6 +20,7 @@ from google.adk.evaluation.eval_config import get_eval_metrics_from_config
from google.adk.evaluation.eval_config import get_evaluation_criteria_or_default
from google.adk.evaluation.eval_rubrics import Rubric
from google.adk.evaluation.eval_rubrics import RubricContent
import pytest
def test_get_evaluation_criteria_or_default_returns_default():
@@ -99,6 +100,42 @@ def test_get_eval_metrics_from_config():
assert eval_metrics[3].criterion.rubrics[0] == rubric_1
def test_get_eval_metrics_from_config_with_custom_metrics():
eval_config = EvalConfig(
criteria={
"custom_metric_1": 1.0,
"custom_metric_2": {
"threshold": 0.5,
},
},
custom_metrics={
"custom_metric_1": {"name": "path/to/custom/metric_1"},
"custom_metric_2": {"name": "path/to/custom/metric_2"},
},
)
eval_metrics = get_eval_metrics_from_config(eval_config)
assert len(eval_metrics) == 2
assert eval_metrics[0].metric_name == "custom_metric_1"
assert eval_metrics[0].threshold == 1.0
assert eval_metrics[0].criterion.threshold == 1.0
assert eval_metrics[0].custom_function_path == "path/to/custom/metric_1"
assert eval_metrics[1].metric_name == "custom_metric_2"
assert eval_metrics[1].threshold == 0.5
assert eval_metrics[1].criterion.threshold == 0.5
assert eval_metrics[1].custom_function_path == "path/to/custom/metric_2"
def test_custom_metric_code_config_with_args_raises_error():
with pytest.raises(ValueError):
eval_config = EvalConfig(
criteria={"custom_metric": 1.0},
custom_metrics={
"custom_metric": {"name": "name", "args": [{"value": 1}]}
},
)
def test_get_eval_metrics_from_config_empty_criteria():
eval_config = EvalConfig(criteria={})
eval_metrics = get_eval_metrics_from_config(eval_config)