From 960eda3d1f2f46dc93a365eb3de03dc3483fe9bb Mon Sep 17 00:00:00 2001 From: Google Team Member Date: Fri, 3 Oct 2025 15:36:25 -0700 Subject: [PATCH] feat: Add `dry_run` functionality to BigQuery `execute_sql` tool PiperOrigin-RevId: 814854520 --- contributing/samples/bigquery/README.md | 2 +- src/google/adk/tools/bigquery/query_tool.py | 151 ++++++++++++++-- .../bigquery/test_bigquery_query_tool.py | 167 ++++++++++++++++-- 3 files changed, 295 insertions(+), 25 deletions(-) diff --git a/contributing/samples/bigquery/README.md b/contributing/samples/bigquery/README.md index ea6c70a2..3185c463 100644 --- a/contributing/samples/bigquery/README.md +++ b/contributing/samples/bigquery/README.md @@ -23,7 +23,7 @@ distributed via the `google.adk.tools.bigquery` module. These tools include: 1. `execute_sql` - Runs a SQL query in BigQuery. + Runs or dry-runs a SQL query in BigQuery. 1. `ask_data_insights` diff --git a/src/google/adk/tools/bigquery/query_tool.py b/src/google/adk/tools/bigquery/query_tool.py index 03be2866..e2b1b256 100644 --- a/src/google/adk/tools/bigquery/query_tool.py +++ b/src/google/adk/tools/bigquery/query_tool.py @@ -37,6 +37,7 @@ def execute_sql( credentials: Credentials, settings: BigQueryToolConfig, tool_context: ToolContext, + dry_run: bool = False, ) -> dict: """Run a BigQuery or BigQuery ML SQL query in the project and return the result. @@ -47,12 +48,17 @@ def execute_sql( credentials (Credentials): The credentials to use for the request. settings (BigQueryToolConfig): The settings for the tool. tool_context (ToolContext): The context for the tool. + dry_run (bool, default False): If True, the query will not be executed. + Instead, the query will be validated and information about the query + will be returned. Defaults to False. Returns: - dict: Dictionary representing the result of the query. - If the result contains the key "result_is_likely_truncated" with - value True, it means that there may be additional rows matching the - query not returned in the result. + dict: If `dry_run` is False, dictionary representing the result of the + query. If the result contains the key "result_is_likely_truncated" + with value True, it means that there may be additional rows matching + the query not returned in the result. + If `dry_run` is True, dictionary with "dry_run_info" field + containing query information returned by BigQuery. Examples: Fetch data or insights from a table: @@ -77,6 +83,39 @@ def execute_sql( } ] } + + Validate a query and estimate costs without executing it: + + >>> execute_sql( + ... "my_project", + ... "SELECT island FROM " + ... "bigquery-public-data.ml_datasets.penguins", + ... dry_run=True + ... ) + { + "status": "SUCCESS", + "dry_run_info": { + "configuration": { + "dryRun": True, + "jobType": "QUERY", + "query": { + "destinationTable": { + "datasetId": "_...", + "projectId": "my_project", + "tableId": "anon..." + }, + "priority": "INTERACTIVE", + "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins", + "useLegacySql": False, + "writeDisposition": "WRITE_TRUNCATE" + } + }, + "jobReference": { + "location": "US", + "projectId": "my_project" + } + } + } """ try: # Validate compute project if applicable @@ -167,6 +206,18 @@ def execute_sql( } # Finally execute the query and fetch the result + if dry_run: + job_config_kwargs = {"dry_run": True} + if bq_connection_properties: + job_config_kwargs["connection_properties"] = bq_connection_properties + job_config = bigquery.QueryJobConfig(**job_config_kwargs) + dry_run_job = bq_client.query( + query, + project=project_id, + job_config=job_config, + ) + return {"status": "SUCCESS", "dry_run_info": dry_run_job.to_api_repr()} + job_config = ( bigquery.QueryJobConfig(connection_properties=bq_connection_properties) if bq_connection_properties @@ -214,12 +265,17 @@ def _execute_sql_write_mode(*args, **kwargs) -> dict: credentials (Credentials): The credentials to use for the request. settings (BigQueryToolConfig): The settings for the tool. tool_context (ToolContext): The context for the tool. + dry_run (bool, default False): If True, the query will not be executed. + Instead, the query will be validated and information about the query + will be returned. Defaults to False. Returns: - dict: Dictionary representing the result of the query. - If the result contains the key "result_is_likely_truncated" with - value True, it means that there may be additional rows matching the - query not returned in the result. + dict: If `dry_run` is False, dictionary representing the result of the + query. If the result contains the key "result_is_likely_truncated" + with value True, it means that there may be additional rows matching + the query not returned in the result. + If `dry_run` is True, dictionary with "dry_run_info" field + containing query information returned by BigQuery. Examples: Fetch data or insights from a table: @@ -245,6 +301,39 @@ def _execute_sql_write_mode(*args, **kwargs) -> dict: ] } + Validate a query and estimate costs without executing it: + + >>> execute_sql( + ... "my_project", + ... "SELECT island FROM " + ... "bigquery-public-data.ml_datasets.penguins", + ... dry_run=True + ... ) + { + "status": "SUCCESS", + "dry_run_info": { + "configuration": { + "dryRun": True, + "jobType": "QUERY", + "query": { + "destinationTable": { + "datasetId": "_...", + "projectId": "my_project", + "tableId": "anon..." + }, + "priority": "INTERACTIVE", + "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins", + "useLegacySql": False, + "writeDisposition": "WRITE_TRUNCATE" + } + }, + "jobReference": { + "location": "US", + "projectId": "my_project" + } + } + } + Create a table with schema prescribed: >>> execute_sql("my_project", @@ -396,12 +485,17 @@ def _execute_sql_protected_write_mode(*args, **kwargs) -> dict: credentials (Credentials): The credentials to use for the request. settings (BigQueryToolConfig): The settings for the tool. tool_context (ToolContext): The context for the tool. + dry_run (bool, default False): If True, the query will not be executed. + Instead, the query will be validated and information about the query + will be returned. Defaults to False. Returns: - dict: Dictionary representing the result of the query. - If the result contains the key "result_is_likely_truncated" with - value True, it means that there may be additional rows matching the - query not returned in the result. + dict: If `dry_run` is False, dictionary representing the result of the + query. If the result contains the key "result_is_likely_truncated" + with value True, it means that there may be additional rows matching + the query not returned in the result. + If `dry_run` is True, dictionary with "dry_run_info" field + containing query information returned by BigQuery. Examples: Fetch data or insights from a table: @@ -427,6 +521,39 @@ def _execute_sql_protected_write_mode(*args, **kwargs) -> dict: ] } + Validate a query and estimate costs without executing it: + + >>> execute_sql( + ... "my_project", + ... "SELECT island FROM " + ... "bigquery-public-data.ml_datasets.penguins", + ... dry_run=True + ... ) + { + "status": "SUCCESS", + "dry_run_info": { + "configuration": { + "dryRun": True, + "jobType": "QUERY", + "query": { + "destinationTable": { + "datasetId": "_...", + "projectId": "my_project", + "tableId": "anon..." + }, + "priority": "INTERACTIVE", + "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins", + "useLegacySql": False, + "writeDisposition": "WRITE_TRUNCATE" + } + }, + "jobReference": { + "location": "US", + "projectId": "my_project" + } + } + } + Create a temporary table with schema prescribed: >>> execute_sql("my_project", diff --git a/tests/unittests/tools/bigquery/test_bigquery_query_tool.py b/tests/unittests/tools/bigquery/test_bigquery_query_tool.py index 95abbe49..8e0d19cf 100644 --- a/tests/unittests/tools/bigquery/test_bigquery_query_tool.py +++ b/tests/unittests/tools/bigquery/test_bigquery_query_tool.py @@ -95,12 +95,17 @@ async def test_execute_sql_declaration_read_only(tool_settings): credentials (Credentials): The credentials to use for the request. settings (BigQueryToolConfig): The settings for the tool. tool_context (ToolContext): The context for the tool. + dry_run (bool, default False): If True, the query will not be executed. + Instead, the query will be validated and information about the query + will be returned. Defaults to False. Returns: - dict: Dictionary representing the result of the query. - If the result contains the key "result_is_likely_truncated" with - value True, it means that there may be additional rows matching the - query not returned in the result. + dict: If `dry_run` is False, dictionary representing the result of the + query. If the result contains the key "result_is_likely_truncated" + with value True, it means that there may be additional rows matching + the query not returned in the result. + If `dry_run` is True, dictionary with "dry_run_info" field + containing query information returned by BigQuery. Examples: Fetch data or insights from a table: @@ -124,6 +129,39 @@ async def test_execute_sql_declaration_read_only(tool_settings): "population": 52 } ] + } + + Validate a query and estimate costs without executing it: + + >>> execute_sql( + ... "my_project", + ... "SELECT island FROM " + ... "bigquery-public-data.ml_datasets.penguins", + ... dry_run=True + ... ) + { + "status": "SUCCESS", + "dry_run_info": { + "configuration": { + "dryRun": True, + "jobType": "QUERY", + "query": { + "destinationTable": { + "datasetId": "_...", + "projectId": "my_project", + "tableId": "anon..." + }, + "priority": "INTERACTIVE", + "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins", + "useLegacySql": False, + "writeDisposition": "WRITE_TRUNCATE" + } + }, + "jobReference": { + "location": "US", + "projectId": "my_project" + } + } }""") @@ -156,12 +194,17 @@ async def test_execute_sql_declaration_write(tool_settings): credentials (Credentials): The credentials to use for the request. settings (BigQueryToolConfig): The settings for the tool. tool_context (ToolContext): The context for the tool. + dry_run (bool, default False): If True, the query will not be executed. + Instead, the query will be validated and information about the query + will be returned. Defaults to False. Returns: - dict: Dictionary representing the result of the query. - If the result contains the key "result_is_likely_truncated" with - value True, it means that there may be additional rows matching the - query not returned in the result. + dict: If `dry_run` is False, dictionary representing the result of the + query. If the result contains the key "result_is_likely_truncated" + with value True, it means that there may be additional rows matching + the query not returned in the result. + If `dry_run` is True, dictionary with "dry_run_info" field + containing query information returned by BigQuery. Examples: Fetch data or insights from a table: @@ -187,6 +230,39 @@ async def test_execute_sql_declaration_write(tool_settings): ] } + Validate a query and estimate costs without executing it: + + >>> execute_sql( + ... "my_project", + ... "SELECT island FROM " + ... "bigquery-public-data.ml_datasets.penguins", + ... dry_run=True + ... ) + { + "status": "SUCCESS", + "dry_run_info": { + "configuration": { + "dryRun": True, + "jobType": "QUERY", + "query": { + "destinationTable": { + "datasetId": "_...", + "projectId": "my_project", + "tableId": "anon..." + }, + "priority": "INTERACTIVE", + "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins", + "useLegacySql": False, + "writeDisposition": "WRITE_TRUNCATE" + } + }, + "jobReference": { + "location": "US", + "projectId": "my_project" + } + } + } + Create a table with schema prescribed: >>> execute_sql("my_project", @@ -355,12 +431,17 @@ async def test_execute_sql_declaration_protected_write(tool_settings): credentials (Credentials): The credentials to use for the request. settings (BigQueryToolConfig): The settings for the tool. tool_context (ToolContext): The context for the tool. + dry_run (bool, default False): If True, the query will not be executed. + Instead, the query will be validated and information about the query + will be returned. Defaults to False. Returns: - dict: Dictionary representing the result of the query. - If the result contains the key "result_is_likely_truncated" with - value True, it means that there may be additional rows matching the - query not returned in the result. + dict: If `dry_run` is False, dictionary representing the result of the + query. If the result contains the key "result_is_likely_truncated" + with value True, it means that there may be additional rows matching + the query not returned in the result. + If `dry_run` is True, dictionary with "dry_run_info" field + containing query information returned by BigQuery. Examples: Fetch data or insights from a table: @@ -386,6 +467,39 @@ async def test_execute_sql_declaration_protected_write(tool_settings): ] } + Validate a query and estimate costs without executing it: + + >>> execute_sql( + ... "my_project", + ... "SELECT island FROM " + ... "bigquery-public-data.ml_datasets.penguins", + ... dry_run=True + ... ) + { + "status": "SUCCESS", + "dry_run_info": { + "configuration": { + "dryRun": True, + "jobType": "QUERY", + "query": { + "destinationTable": { + "datasetId": "_...", + "projectId": "my_project", + "tableId": "anon..." + }, + "priority": "INTERACTIVE", + "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins", + "useLegacySql": False, + "writeDisposition": "WRITE_TRUNCATE" + } + }, + "jobReference": { + "location": "US", + "projectId": "my_project" + } + } + } + Create a temporary table with schema prescribed: >>> execute_sql("my_project", @@ -798,6 +912,35 @@ def test_execute_sql_non_select_stmt_write_protected_persistent_target( } +def test_execute_sql_dry_run_true(): + """Test execute_sql tool with dry_run=True.""" + project = "my_project" + query = "SELECT 123 AS num" + credentials = mock.create_autospec(Credentials, instance=True) + tool_settings = BigQueryToolConfig(write_mode=WriteMode.ALLOWED) + tool_context = mock.create_autospec(ToolContext, instance=True) + api_repr = { + "configuration": {"dryRun": True, "query": {"query": query}}, + "jobReference": {"projectId": project, "location": "US"}, + } + + with mock.patch("google.cloud.bigquery.Client", autospec=False) as Client: + bq_client = Client.return_value + + query_job = mock.create_autospec(bigquery.QueryJob) + query_job.to_api_repr.return_value = api_repr + bq_client.query.return_value = query_job + + result = execute_sql( + project, query, credentials, tool_settings, tool_context, dry_run=True + ) + assert result == {"status": "SUCCESS", "dry_run_info": api_repr} + bq_client.query.assert_called_once() + _, mock_kwargs = bq_client.query.call_args + assert mock_kwargs["job_config"].dry_run == True + bq_client.query_and_wait.assert_not_called() + + @pytest.mark.parametrize( ("write_mode",), [