feat: Add dry_run functionality to BigQuery execute_sql tool

PiperOrigin-RevId: 814854520
2026-03-30 10:57:20 -07:00 · 2025-10-03 15:36:25 -07:00
parent 0b84d3eea7
commit 960eda3d1f
3 changed files with 295 additions and 25 deletions
@@ -23,7 +23,7 @@ distributed via the `google.adk.tools.bigquery` module. These tools include:

 1. `execute_sql`

-  Runs a SQL query in BigQuery.
+  Runs or dry-runs a SQL query in BigQuery.

 1. `ask_data_insights`

@@ -37,6 +37,7 @@ def execute_sql(
    credentials: Credentials,
    settings: BigQueryToolConfig,
    tool_context: ToolContext,
+    dry_run: bool = False,
 ) -> dict:
  """Run a BigQuery or BigQuery ML SQL query in the project and return the result.

@@ -47,12 +48,17 @@ def execute_sql(
      credentials (Credentials): The credentials to use for the request.
      settings (BigQueryToolConfig): The settings for the tool.
      tool_context (ToolContext): The context for the tool.
+      dry_run (bool, default False): If True, the query will not be executed.
+        Instead, the query will be validated and information about the query
+        will be returned. Defaults to False.

  Returns:
-      dict: Dictionary representing the result of the query.
-            If the result contains the key "result_is_likely_truncated" with
-            value True, it means that there may be additional rows matching the
-            query not returned in the result.
+      dict: If `dry_run` is False, dictionary representing the result of the
+            query. If the result contains the key "result_is_likely_truncated"
+            with value True, it means that there may be additional rows matching
+            the query not returned in the result.
+            If `dry_run` is True, dictionary with "dry_run_info" field
+            containing query information returned by BigQuery.

  Examples:
      Fetch data or insights from a table:
@@ -77,6 +83,39 @@ def execute_sql(
                }
            ]
          }
+
+      Validate a query and estimate costs without executing it:
+
+          >>> execute_sql(
+          ...     "my_project",
+          ...     "SELECT island FROM "
+          ...     "bigquery-public-data.ml_datasets.penguins",
+          ...     dry_run=True
+          ... )
+          {
+            "status": "SUCCESS",
+            "dry_run_info": {
+              "configuration": {
+                "dryRun": True,
+                "jobType": "QUERY",
+                "query": {
+                  "destinationTable": {
+                    "datasetId": "_...",
+                    "projectId": "my_project",
+                    "tableId": "anon..."
+                  },
+                  "priority": "INTERACTIVE",
+                  "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
+                  "useLegacySql": False,
+                  "writeDisposition": "WRITE_TRUNCATE"
+                }
+              },
+              "jobReference": {
+                "location": "US",
+                "projectId": "my_project"
+              }
+            }
+          }
  """
  try:
    # Validate compute project if applicable
@@ -167,6 +206,18 @@ def execute_sql(
        }

    # Finally execute the query and fetch the result
+    if dry_run:
+      job_config_kwargs = {"dry_run": True}
+      if bq_connection_properties:
+        job_config_kwargs["connection_properties"] = bq_connection_properties
+      job_config = bigquery.QueryJobConfig(**job_config_kwargs)
+      dry_run_job = bq_client.query(
+          query,
+          project=project_id,
+          job_config=job_config,
+      )
+      return {"status": "SUCCESS", "dry_run_info": dry_run_job.to_api_repr()}
+
    job_config = (
        bigquery.QueryJobConfig(connection_properties=bq_connection_properties)
        if bq_connection_properties
@@ -214,12 +265,17 @@ def _execute_sql_write_mode(*args, **kwargs) -> dict:
      credentials (Credentials): The credentials to use for the request.
      settings (BigQueryToolConfig): The settings for the tool.
      tool_context (ToolContext): The context for the tool.
+      dry_run (bool, default False): If True, the query will not be executed.
+        Instead, the query will be validated and information about the query
+        will be returned. Defaults to False.

  Returns:
-      dict: Dictionary representing the result of the query.
-            If the result contains the key "result_is_likely_truncated" with
-            value True, it means that there may be additional rows matching the
-            query not returned in the result.
+      dict: If `dry_run` is False, dictionary representing the result of the
+            query. If the result contains the key "result_is_likely_truncated"
+            with value True, it means that there may be additional rows matching
+            the query not returned in the result.
+            If `dry_run` is True, dictionary with "dry_run_info" field
+            containing query information returned by BigQuery.

  Examples:
      Fetch data or insights from a table:
@@ -245,6 +301,39 @@ def _execute_sql_write_mode(*args, **kwargs) -> dict:
            ]
          }

+      Validate a query and estimate costs without executing it:
+
+          >>> execute_sql(
+          ...     "my_project",
+          ...     "SELECT island FROM "
+          ...     "bigquery-public-data.ml_datasets.penguins",
+          ...     dry_run=True
+          ... )
+          {
+            "status": "SUCCESS",
+            "dry_run_info": {
+              "configuration": {
+                "dryRun": True,
+                "jobType": "QUERY",
+                "query": {
+                  "destinationTable": {
+                    "datasetId": "_...",
+                    "projectId": "my_project",
+                    "tableId": "anon..."
+                  },
+                  "priority": "INTERACTIVE",
+                  "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
+                  "useLegacySql": False,
+                  "writeDisposition": "WRITE_TRUNCATE"
+                }
+              },
+              "jobReference": {
+                "location": "US",
+                "projectId": "my_project"
+              }
+            }
+          }
+
      Create a table with schema prescribed:

          >>> execute_sql("my_project",
@@ -396,12 +485,17 @@ def _execute_sql_protected_write_mode(*args, **kwargs) -> dict:
      credentials (Credentials): The credentials to use for the request.
      settings (BigQueryToolConfig): The settings for the tool.
      tool_context (ToolContext): The context for the tool.
+      dry_run (bool, default False): If True, the query will not be executed.
+        Instead, the query will be validated and information about the query
+        will be returned. Defaults to False.

  Returns:
-      dict: Dictionary representing the result of the query.
-            If the result contains the key "result_is_likely_truncated" with
-            value True, it means that there may be additional rows matching the
-            query not returned in the result.
+      dict: If `dry_run` is False, dictionary representing the result of the
+            query. If the result contains the key "result_is_likely_truncated"
+            with value True, it means that there may be additional rows matching
+            the query not returned in the result.
+            If `dry_run` is True, dictionary with "dry_run_info" field
+            containing query information returned by BigQuery.

  Examples:
      Fetch data or insights from a table:
@@ -427,6 +521,39 @@ def _execute_sql_protected_write_mode(*args, **kwargs) -> dict:
            ]
          }

+      Validate a query and estimate costs without executing it:
+
+          >>> execute_sql(
+          ...     "my_project",
+          ...     "SELECT island FROM "
+          ...     "bigquery-public-data.ml_datasets.penguins",
+          ...     dry_run=True
+          ... )
+          {
+            "status": "SUCCESS",
+            "dry_run_info": {
+              "configuration": {
+                "dryRun": True,
+                "jobType": "QUERY",
+                "query": {
+                  "destinationTable": {
+                    "datasetId": "_...",
+                    "projectId": "my_project",
+                    "tableId": "anon..."
+                  },
+                  "priority": "INTERACTIVE",
+                  "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
+                  "useLegacySql": False,
+                  "writeDisposition": "WRITE_TRUNCATE"
+                }
+              },
+              "jobReference": {
+                "location": "US",
+                "projectId": "my_project"
+              }
+            }
+          }
+
      Create a temporary table with schema prescribed:

          >>> execute_sql("my_project",
@@ -95,12 +95,17 @@ async def test_execute_sql_declaration_read_only(tool_settings):
        credentials (Credentials): The credentials to use for the request.
        settings (BigQueryToolConfig): The settings for the tool.
        tool_context (ToolContext): The context for the tool.
+        dry_run (bool, default False): If True, the query will not be executed.
+          Instead, the query will be validated and information about the query
+          will be returned. Defaults to False.

    Returns:
-        dict: Dictionary representing the result of the query.
-              If the result contains the key "result_is_likely_truncated" with
-              value True, it means that there may be additional rows matching the
-              query not returned in the result.
+        dict: If `dry_run` is False, dictionary representing the result of the
+              query. If the result contains the key "result_is_likely_truncated"
+              with value True, it means that there may be additional rows matching
+              the query not returned in the result.
+              If `dry_run` is True, dictionary with "dry_run_info" field
+              containing query information returned by BigQuery.

    Examples:
        Fetch data or insights from a table:
@@ -124,6 +129,39 @@ async def test_execute_sql_declaration_read_only(tool_settings):
                      "population": 52
                  }
              ]
+            }
+
+        Validate a query and estimate costs without executing it:
+
+            >>> execute_sql(
+            ...     "my_project",
+            ...     "SELECT island FROM "
+            ...     "bigquery-public-data.ml_datasets.penguins",
+            ...     dry_run=True
+            ... )
+            {
+              "status": "SUCCESS",
+              "dry_run_info": {
+                "configuration": {
+                  "dryRun": True,
+                  "jobType": "QUERY",
+                  "query": {
+                    "destinationTable": {
+                      "datasetId": "_...",
+                      "projectId": "my_project",
+                      "tableId": "anon..."
+                    },
+                    "priority": "INTERACTIVE",
+                    "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
+                    "useLegacySql": False,
+                    "writeDisposition": "WRITE_TRUNCATE"
+                  }
+                },
+                "jobReference": {
+                  "location": "US",
+                  "projectId": "my_project"
+                }
+              }
            }""")


@@ -156,12 +194,17 @@ async def test_execute_sql_declaration_write(tool_settings):
        credentials (Credentials): The credentials to use for the request.
        settings (BigQueryToolConfig): The settings for the tool.
        tool_context (ToolContext): The context for the tool.
+        dry_run (bool, default False): If True, the query will not be executed.
+          Instead, the query will be validated and information about the query
+          will be returned. Defaults to False.

    Returns:
-        dict: Dictionary representing the result of the query.
-              If the result contains the key "result_is_likely_truncated" with
-              value True, it means that there may be additional rows matching the
-              query not returned in the result.
+        dict: If `dry_run` is False, dictionary representing the result of the
+              query. If the result contains the key "result_is_likely_truncated"
+              with value True, it means that there may be additional rows matching
+              the query not returned in the result.
+              If `dry_run` is True, dictionary with "dry_run_info" field
+              containing query information returned by BigQuery.

    Examples:
        Fetch data or insights from a table:
@@ -187,6 +230,39 @@ async def test_execute_sql_declaration_write(tool_settings):
              ]
            }

+        Validate a query and estimate costs without executing it:
+
+            >>> execute_sql(
+            ...     "my_project",
+            ...     "SELECT island FROM "
+            ...     "bigquery-public-data.ml_datasets.penguins",
+            ...     dry_run=True
+            ... )
+            {
+              "status": "SUCCESS",
+              "dry_run_info": {
+                "configuration": {
+                  "dryRun": True,
+                  "jobType": "QUERY",
+                  "query": {
+                    "destinationTable": {
+                      "datasetId": "_...",
+                      "projectId": "my_project",
+                      "tableId": "anon..."
+                    },
+                    "priority": "INTERACTIVE",
+                    "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
+                    "useLegacySql": False,
+                    "writeDisposition": "WRITE_TRUNCATE"
+                  }
+                },
+                "jobReference": {
+                  "location": "US",
+                  "projectId": "my_project"
+                }
+              }
+            }
+
        Create a table with schema prescribed:

            >>> execute_sql("my_project",
@@ -355,12 +431,17 @@ async def test_execute_sql_declaration_protected_write(tool_settings):
        credentials (Credentials): The credentials to use for the request.
        settings (BigQueryToolConfig): The settings for the tool.
        tool_context (ToolContext): The context for the tool.
+        dry_run (bool, default False): If True, the query will not be executed.
+          Instead, the query will be validated and information about the query
+          will be returned. Defaults to False.

    Returns:
-        dict: Dictionary representing the result of the query.
-              If the result contains the key "result_is_likely_truncated" with
-              value True, it means that there may be additional rows matching the
-              query not returned in the result.
+        dict: If `dry_run` is False, dictionary representing the result of the
+              query. If the result contains the key "result_is_likely_truncated"
+              with value True, it means that there may be additional rows matching
+              the query not returned in the result.
+              If `dry_run` is True, dictionary with "dry_run_info" field
+              containing query information returned by BigQuery.

    Examples:
        Fetch data or insights from a table:
@@ -386,6 +467,39 @@ async def test_execute_sql_declaration_protected_write(tool_settings):
              ]
            }

+        Validate a query and estimate costs without executing it:
+
+            >>> execute_sql(
+            ...     "my_project",
+            ...     "SELECT island FROM "
+            ...     "bigquery-public-data.ml_datasets.penguins",
+            ...     dry_run=True
+            ... )
+            {
+              "status": "SUCCESS",
+              "dry_run_info": {
+                "configuration": {
+                  "dryRun": True,
+                  "jobType": "QUERY",
+                  "query": {
+                    "destinationTable": {
+                      "datasetId": "_...",
+                      "projectId": "my_project",
+                      "tableId": "anon..."
+                    },
+                    "priority": "INTERACTIVE",
+                    "query": "SELECT island FROM bigquery-public-data.ml_datasets.penguins",
+                    "useLegacySql": False,
+                    "writeDisposition": "WRITE_TRUNCATE"
+                  }
+                },
+                "jobReference": {
+                  "location": "US",
+                  "projectId": "my_project"
+                }
+              }
+            }
+
        Create a temporary table with schema prescribed:

            >>> execute_sql("my_project",
@@ -798,6 +912,35 @@ def test_execute_sql_non_select_stmt_write_protected_persistent_target(
    }


+def test_execute_sql_dry_run_true():
+  """Test execute_sql tool with dry_run=True."""
+  project = "my_project"
+  query = "SELECT 123 AS num"
+  credentials = mock.create_autospec(Credentials, instance=True)
+  tool_settings = BigQueryToolConfig(write_mode=WriteMode.ALLOWED)
+  tool_context = mock.create_autospec(ToolContext, instance=True)
+  api_repr = {
+      "configuration": {"dryRun": True, "query": {"query": query}},
+      "jobReference": {"projectId": project, "location": "US"},
+  }
+
+  with mock.patch("google.cloud.bigquery.Client", autospec=False) as Client:
+    bq_client = Client.return_value
+
+    query_job = mock.create_autospec(bigquery.QueryJob)
+    query_job.to_api_repr.return_value = api_repr
+    bq_client.query.return_value = query_job
+
+    result = execute_sql(
+        project, query, credentials, tool_settings, tool_context, dry_run=True
+    )
+    assert result == {"status": "SUCCESS", "dry_run_info": api_repr}
+    bq_client.query.assert_called_once()
+    _, mock_kwargs = bq_client.query.call_args
+    assert mock_kwargs["job_config"].dry_run == True
+    bq_client.query_and_wait.assert_not_called()
+
+
@pytest.mark.parametrize(
    ("write_mode",),
    [