feat: Add dry_run functionality to BigQuery execute_sql tool

PiperOrigin-RevId: 814854520
This commit is contained in:
Google Team Member
2025-10-03 15:36:25 -07:00
committed by Copybara-Service
parent 0b84d3eea7
commit 960eda3d1f
3 changed files with 295 additions and 25 deletions
+1 -1
View File
@@ -23,7 +23,7 @@ distributed via the `google.adk.tools.bigquery` module. These tools include:
1. `execute_sql`
Runs a SQL query in BigQuery.
Runs or dry-runs a SQL query in BigQuery.
1. `ask_data_insights`
+139 -12
View File
@@ -37,6 +37,7 @@ def execute_sql(
credentials: Credentials,
settings: BigQueryToolConfig,
tool_context: ToolContext,
dry_run: bool = False,
) -> dict:
"""Run a BigQuery or BigQuery ML SQL query in the project and return the result.
@@ -47,12 +48,17 @@ def execute_sql(
credentials (Credentials): The credentials to use for the request.
settings (BigQueryToolConfig): The settings for the tool.
tool_context (ToolContext): The context for the tool.
dry_run (bool, default False): If True, the query will not be executed.
Instead, the query will be validated and information about the query
will be returned. Defaults to False.
Returns:
dict: Dictionary representing the result of the query.
If the result contains the key "result_is_likely_truncated" with
value True, it means that there may be additional rows matching the
query not returned in the result.
dict: If `dry_run` is False, dictionary representing the result of the
query. If the result contains the key "result_is_likely_truncated"
with value True, it means that there may be additional rows matching
the query not returned in the result.
If `dry_run` is True, dictionary with "dry_run_info" field
containing query information returned by BigQuery.
Examples:
Fetch data or insights from a table:
@@ -77,6 +83,39 @@ def execute_sql(
}
]
}
Validate a query and estimate costs without executing it:
>>> execute_sql(
... "my_project",
... "SELECT island FROM "
... "bigquery-public-data.ml_datasets.penguins",
... dry_run=True
... )
{
"status": "SUCCESS",
"dry_run_info": {
"configuration": {
"dryRun": True,
"jobType": "QUERY",
"query": {
"destinationTable": {
"datasetId": "_...",
"projectId": "my_project",
"tableId": "anon..."
},
"priority": "INTERACTIVE",
"query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
"useLegacySql": False,
"writeDisposition": "WRITE_TRUNCATE"
}
},
"jobReference": {
"location": "US",
"projectId": "my_project"
}
}
}
"""
try:
# Validate compute project if applicable
@@ -167,6 +206,18 @@ def execute_sql(
}
# Finally execute the query and fetch the result
if dry_run:
job_config_kwargs = {"dry_run": True}
if bq_connection_properties:
job_config_kwargs["connection_properties"] = bq_connection_properties
job_config = bigquery.QueryJobConfig(**job_config_kwargs)
dry_run_job = bq_client.query(
query,
project=project_id,
job_config=job_config,
)
return {"status": "SUCCESS", "dry_run_info": dry_run_job.to_api_repr()}
job_config = (
bigquery.QueryJobConfig(connection_properties=bq_connection_properties)
if bq_connection_properties
@@ -214,12 +265,17 @@ def _execute_sql_write_mode(*args, **kwargs) -> dict:
credentials (Credentials): The credentials to use for the request.
settings (BigQueryToolConfig): The settings for the tool.
tool_context (ToolContext): The context for the tool.
dry_run (bool, default False): If True, the query will not be executed.
Instead, the query will be validated and information about the query
will be returned. Defaults to False.
Returns:
dict: Dictionary representing the result of the query.
If the result contains the key "result_is_likely_truncated" with
value True, it means that there may be additional rows matching the
query not returned in the result.
dict: If `dry_run` is False, dictionary representing the result of the
query. If the result contains the key "result_is_likely_truncated"
with value True, it means that there may be additional rows matching
the query not returned in the result.
If `dry_run` is True, dictionary with "dry_run_info" field
containing query information returned by BigQuery.
Examples:
Fetch data or insights from a table:
@@ -245,6 +301,39 @@ def _execute_sql_write_mode(*args, **kwargs) -> dict:
]
}
Validate a query and estimate costs without executing it:
>>> execute_sql(
... "my_project",
... "SELECT island FROM "
... "bigquery-public-data.ml_datasets.penguins",
... dry_run=True
... )
{
"status": "SUCCESS",
"dry_run_info": {
"configuration": {
"dryRun": True,
"jobType": "QUERY",
"query": {
"destinationTable": {
"datasetId": "_...",
"projectId": "my_project",
"tableId": "anon..."
},
"priority": "INTERACTIVE",
"query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
"useLegacySql": False,
"writeDisposition": "WRITE_TRUNCATE"
}
},
"jobReference": {
"location": "US",
"projectId": "my_project"
}
}
}
Create a table with schema prescribed:
>>> execute_sql("my_project",
@@ -396,12 +485,17 @@ def _execute_sql_protected_write_mode(*args, **kwargs) -> dict:
credentials (Credentials): The credentials to use for the request.
settings (BigQueryToolConfig): The settings for the tool.
tool_context (ToolContext): The context for the tool.
dry_run (bool, default False): If True, the query will not be executed.
Instead, the query will be validated and information about the query
will be returned. Defaults to False.
Returns:
dict: Dictionary representing the result of the query.
If the result contains the key "result_is_likely_truncated" with
value True, it means that there may be additional rows matching the
query not returned in the result.
dict: If `dry_run` is False, dictionary representing the result of the
query. If the result contains the key "result_is_likely_truncated"
with value True, it means that there may be additional rows matching
the query not returned in the result.
If `dry_run` is True, dictionary with "dry_run_info" field
containing query information returned by BigQuery.
Examples:
Fetch data or insights from a table:
@@ -427,6 +521,39 @@ def _execute_sql_protected_write_mode(*args, **kwargs) -> dict:
]
}
Validate a query and estimate costs without executing it:
>>> execute_sql(
... "my_project",
... "SELECT island FROM "
... "bigquery-public-data.ml_datasets.penguins",
... dry_run=True
... )
{
"status": "SUCCESS",
"dry_run_info": {
"configuration": {
"dryRun": True,
"jobType": "QUERY",
"query": {
"destinationTable": {
"datasetId": "_...",
"projectId": "my_project",
"tableId": "anon..."
},
"priority": "INTERACTIVE",
"query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
"useLegacySql": False,
"writeDisposition": "WRITE_TRUNCATE"
}
},
"jobReference": {
"location": "US",
"projectId": "my_project"
}
}
}
Create a temporary table with schema prescribed:
>>> execute_sql("my_project",
@@ -95,12 +95,17 @@ async def test_execute_sql_declaration_read_only(tool_settings):
credentials (Credentials): The credentials to use for the request.
settings (BigQueryToolConfig): The settings for the tool.
tool_context (ToolContext): The context for the tool.
dry_run (bool, default False): If True, the query will not be executed.
Instead, the query will be validated and information about the query
will be returned. Defaults to False.
Returns:
dict: Dictionary representing the result of the query.
If the result contains the key "result_is_likely_truncated" with
value True, it means that there may be additional rows matching the
query not returned in the result.
dict: If `dry_run` is False, dictionary representing the result of the
query. If the result contains the key "result_is_likely_truncated"
with value True, it means that there may be additional rows matching
the query not returned in the result.
If `dry_run` is True, dictionary with "dry_run_info" field
containing query information returned by BigQuery.
Examples:
Fetch data or insights from a table:
@@ -124,6 +129,39 @@ async def test_execute_sql_declaration_read_only(tool_settings):
"population": 52
}
]
}
Validate a query and estimate costs without executing it:
>>> execute_sql(
... "my_project",
... "SELECT island FROM "
... "bigquery-public-data.ml_datasets.penguins",
... dry_run=True
... )
{
"status": "SUCCESS",
"dry_run_info": {
"configuration": {
"dryRun": True,
"jobType": "QUERY",
"query": {
"destinationTable": {
"datasetId": "_...",
"projectId": "my_project",
"tableId": "anon..."
},
"priority": "INTERACTIVE",
"query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
"useLegacySql": False,
"writeDisposition": "WRITE_TRUNCATE"
}
},
"jobReference": {
"location": "US",
"projectId": "my_project"
}
}
}""")
@@ -156,12 +194,17 @@ async def test_execute_sql_declaration_write(tool_settings):
credentials (Credentials): The credentials to use for the request.
settings (BigQueryToolConfig): The settings for the tool.
tool_context (ToolContext): The context for the tool.
dry_run (bool, default False): If True, the query will not be executed.
Instead, the query will be validated and information about the query
will be returned. Defaults to False.
Returns:
dict: Dictionary representing the result of the query.
If the result contains the key "result_is_likely_truncated" with
value True, it means that there may be additional rows matching the
query not returned in the result.
dict: If `dry_run` is False, dictionary representing the result of the
query. If the result contains the key "result_is_likely_truncated"
with value True, it means that there may be additional rows matching
the query not returned in the result.
If `dry_run` is True, dictionary with "dry_run_info" field
containing query information returned by BigQuery.
Examples:
Fetch data or insights from a table:
@@ -187,6 +230,39 @@ async def test_execute_sql_declaration_write(tool_settings):
]
}
Validate a query and estimate costs without executing it:
>>> execute_sql(
... "my_project",
... "SELECT island FROM "
... "bigquery-public-data.ml_datasets.penguins",
... dry_run=True
... )
{
"status": "SUCCESS",
"dry_run_info": {
"configuration": {
"dryRun": True,
"jobType": "QUERY",
"query": {
"destinationTable": {
"datasetId": "_...",
"projectId": "my_project",
"tableId": "anon..."
},
"priority": "INTERACTIVE",
"query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
"useLegacySql": False,
"writeDisposition": "WRITE_TRUNCATE"
}
},
"jobReference": {
"location": "US",
"projectId": "my_project"
}
}
}
Create a table with schema prescribed:
>>> execute_sql("my_project",
@@ -355,12 +431,17 @@ async def test_execute_sql_declaration_protected_write(tool_settings):
credentials (Credentials): The credentials to use for the request.
settings (BigQueryToolConfig): The settings for the tool.
tool_context (ToolContext): The context for the tool.
dry_run (bool, default False): If True, the query will not be executed.
Instead, the query will be validated and information about the query
will be returned. Defaults to False.
Returns:
dict: Dictionary representing the result of the query.
If the result contains the key "result_is_likely_truncated" with
value True, it means that there may be additional rows matching the
query not returned in the result.
dict: If `dry_run` is False, dictionary representing the result of the
query. If the result contains the key "result_is_likely_truncated"
with value True, it means that there may be additional rows matching
the query not returned in the result.
If `dry_run` is True, dictionary with "dry_run_info" field
containing query information returned by BigQuery.
Examples:
Fetch data or insights from a table:
@@ -386,6 +467,39 @@ async def test_execute_sql_declaration_protected_write(tool_settings):
]
}
Validate a query and estimate costs without executing it:
>>> execute_sql(
... "my_project",
... "SELECT island FROM "
... "bigquery-public-data.ml_datasets.penguins",
... dry_run=True
... )
{
"status": "SUCCESS",
"dry_run_info": {
"configuration": {
"dryRun": True,
"jobType": "QUERY",
"query": {
"destinationTable": {
"datasetId": "_...",
"projectId": "my_project",
"tableId": "anon..."
},
"priority": "INTERACTIVE",
"query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
"useLegacySql": False,
"writeDisposition": "WRITE_TRUNCATE"
}
},
"jobReference": {
"location": "US",
"projectId": "my_project"
}
}
}
Create a temporary table with schema prescribed:
>>> execute_sql("my_project",
@@ -798,6 +912,35 @@ def test_execute_sql_non_select_stmt_write_protected_persistent_target(
}
def test_execute_sql_dry_run_true():
"""Test execute_sql tool with dry_run=True."""
project = "my_project"
query = "SELECT 123 AS num"
credentials = mock.create_autospec(Credentials, instance=True)
tool_settings = BigQueryToolConfig(write_mode=WriteMode.ALLOWED)
tool_context = mock.create_autospec(ToolContext, instance=True)
api_repr = {
"configuration": {"dryRun": True, "query": {"query": query}},
"jobReference": {"projectId": project, "location": "US"},
}
with mock.patch("google.cloud.bigquery.Client", autospec=False) as Client:
bq_client = Client.return_value
query_job = mock.create_autospec(bigquery.QueryJob)
query_job.to_api_repr.return_value = api_repr
bq_client.query.return_value = query_job
result = execute_sql(
project, query, credentials, tool_settings, tool_context, dry_run=True
)
assert result == {"status": "SUCCESS", "dry_run_info": api_repr}
bq_client.query.assert_called_once()
_, mock_kwargs = bq_client.query.call_args
assert mock_kwargs["job_config"].dry_run == True
bq_client.query_and_wait.assert_not_called()
@pytest.mark.parametrize(
("write_mode",),
[