test: Add evaluation for BigQuery tools

We should treat this as the first step towards building a robust eval story for BQ tools. PiperOrigin-RevId: 807247053
2026-07-09 18:19:28 -07:00 · 2025-09-15 08:16:33 -07:00
parent 7870480c63
commit 103e88e95f
5 changed files with 701 additions and 0 deletions
@@ -0,0 +1,67 @@
+# Instructions
+
+## Run Evaluation
+
+1. Set environment variables in your terminal:
+
+  ```shell
+  export GOOGLE_GENAI_USE_VERTEXAI=FALSE
+  export GOOGLE_API_KEY=<your_api_key>
+  export GOOGLE_CLOUD_PROJECT=<your_bigquery_project>
+  ```
+1. Change to the current directory:
+
+  ```shell
+  cd third_party/py/google/adk/tests/integration/fixture/bigquery_agent/
+  ```
+1. Customize the evaluation dataset to the environment `GOOGLE_CLOUD_PROJECT`
+   by replacing the placeholder to the real project set in your environment:
+
+  ```shell
+  sed -e "s:\${GOOGLE_CLOUD_PROJECT}:${GOOGLE_CLOUD_PROJECT}:g" simple.test.json -i
+  ```
+1. Run the following command as per https://google.github.io/adk-docs/evaluate/#3-adk-eval-run-evaluations-via-the-cli:
+
+  ```shell
+  adk eval . simple.test.json --config_file_path=test_config.json
+  ```
+
+  If it fails, re-run with `--print_detailed_results` flag to see more details
+  on turn-by-turn evaluation.
+
+## Generate Evaluation dataset
+
+1. Set environment variables in your terminal:
+
+  ```shell
+  export GOOGLE_GENAI_USE_VERTEXAI=FALSE
+  export GOOGLE_API_KEY=<your_api_key>
+  export GOOGLE_CLOUD_PROJECT=<your_bigquery_project>
+  ```
+1. Set up google [application default credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc)
+   on your machine.
+
+  ```shell
+  gcloud auth application-default login
+  ```
+1. Change to the directory containing agent folder:
+
+  ```shell
+  cd third_party/py/google/adk/tests/integration/fixture/
+  ```
+1. Run the following command to start the ADK web app:
+
+  ```shell
+  adk web
+  ```
+1. Open the ADK web UI in your browser http://127.0.0.1:8000/dev-ui/?app=bigquery_agent.
+1. Create an evaluation dataset by following [these steps](https://google.github.io/adk-docs/evaluate/#1-adk-web-run-evaluations-via-the-web-ui).
+   This would generate file `bigquery_agent/simple.evalset.json`.
+1. Note that this evaluation data would be tied to the agent interaction in the
+   `GOOGLE_CLOUD_PROJECT` set in your environment. To normalize it by replacing
+   the real project set in your environment to a placeholder, let's run the
+   following command:
+
+  ```shell
+  sed -e "s:${GOOGLE_CLOUD_PROJECT}:\${GOOGLE_CLOUD_PROJECT}:g"  bigquery_agent/simple.evalset.json > bigquery_agent/simple.test.json
+  ```
@@ -0,0 +1,15 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import agent
@@ -0,0 +1,75 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+
+from google.adk.agents.llm_agent import LlmAgent
+from google.adk.tools.bigquery.bigquery_credentials import BigQueryCredentialsConfig
+from google.adk.tools.bigquery.bigquery_toolset import BigQueryToolset
+from google.adk.tools.bigquery.config import BigQueryToolConfig
+from google.adk.tools.bigquery.config import WriteMode
+import google.auth
+
+# Check necessary environment variables
+if not (google_cloud_project_id := os.getenv("GOOGLE_CLOUD_PROJECT")):
+  raise ValueError(
+      "GOOGLE_CLOUD_PROJECT environment variable is not set. Please set it"
+      " to the GCP project ID where your BigQuery jobs would be run."
+  )
+
+# Define an appropriate application name
+BIGQUERY_AGENT_NAME = "adk_eval_bigquery_agent"
+
+
+# Define BigQuery tool config with write mode set to allowed. Note that this is
+# only to demonstrate the full capability of the BigQuery tools. In production
+# you may want to change to BLOCKED (default write mode, effectively makes the
+# tool read-only) or PROTECTED (only allows writes in the anonymous dataset of a
+# BigQuery session) write mode.
+tool_config = BigQueryToolConfig(
+    write_mode=WriteMode.BLOCKED,
+    application_name=BIGQUERY_AGENT_NAME,
+    compute_project_id=google_cloud_project_id,
+)
+
+# Initialize the tools to use the application default credentials.
+# https://cloud.google.com/docs/authentication/provide-credentials-adc
+application_default_credentials, _ = google.auth.default()
+credentials_config = BigQueryCredentialsConfig(
+    credentials=application_default_credentials
+)
+
+bigquery_toolset = BigQueryToolset(
+    credentials_config=credentials_config, bigquery_tool_config=tool_config
+)
+
+# The variable name `root_agent` determines what your root agent is for the
+# debug CLI
+root_agent = LlmAgent(
+    model="gemini-2.5-flash",
+    name=BIGQUERY_AGENT_NAME,
+    description=(
+        "Agent to answer questions about BigQuery data and models and execute"
+        " SQL queries."
+    ),
+    instruction=f"""\
+        You are a data science agent with access to several BigQuery tools.
+        Make use of those tools to answer the user's questions.
+
+        You must use the project id {google_cloud_project_id} for running SQL
+        queries and generating data insights
+    """,
+    tools=[bigquery_toolset],
+)
@@ -0,0 +1,6 @@
+{
+  "criteria": {
+    "tool_trajectory_avg_score": 0.7,
+    "response_match_score": 0.7
+  }
+}