chore: update ADK Answering agent to reference doc site instead of adk-docs repo

PiperOrigin-RevId: 792259386
2026-03-30 10:57:20 -07:00 · 2025-08-07 12:16:30 -07:00
parent 20c30d5819
commit b5a8bad170
3 changed files with 325 additions and 204 deletions
@@ -12,18 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from typing import Any
-
 from adk_answering_agent.settings import BOT_RESPONSE_LABEL
 from adk_answering_agent.settings import IS_INTERACTIVE
 from adk_answering_agent.settings import OWNER
 from adk_answering_agent.settings import REPO
 from adk_answering_agent.settings import VERTEXAI_DATASTORE_ID
-from adk_answering_agent.utils import error_response
-from adk_answering_agent.utils import run_graphql_query
+from adk_answering_agent.tools import add_comment_to_discussion
+from adk_answering_agent.tools import add_label_to_discussion
+from adk_answering_agent.tools import convert_gcs_links_to_https
+from adk_answering_agent.tools import get_discussion_and_comments
 from google.adk.agents.llm_agent import Agent
 from google.adk.tools.vertex_ai_search_tool import VertexAiSearchTool
-import requests

 if IS_INTERACTIVE:
  APPROVAL_INSTRUCTION = (
@@ -35,197 +34,6 @@ else:
      " comment."
  )

-
-def get_discussion_and_comments(discussion_number: int) -> dict[str, Any]:
-  """Fetches a discussion and its comments using the GitHub GraphQL API.
-
-  Args:
-      discussion_number: The number of the GitHub discussion.
-
-  Returns:
-      A dictionary with the request status and the discussion details.
-  """
-  print(f"Attempting to get discussion #{discussion_number} and its comments")
-  query = """
-        query($owner: String!, $repo: String!, $discussionNumber: Int!) {
-          repository(owner: $owner, name: $repo) {
-            discussion(number: $discussionNumber) {
-              id
-              title
-              body
-              createdAt
-              closed
-              author {
-                login
-              }
-              # For each discussion, fetch the latest 20 labels.
-              labels(last: 20) {
-                nodes {
-                  id
-                  name
-                }
-              }
-              # For each discussion, fetch the latest 100 comments.
-              comments(last: 100) {
-                nodes {
-                  id
-                  body
-                  createdAt
-                  author {
-                    login
-                  }
-                  # For each discussion, fetch the latest 50 replies
-                  replies(last: 50) {
-                    nodes {
-                      id
-                      body
-                      createdAt
-                      author {
-                        login
-                      }
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-    """
-  variables = {
-      "owner": OWNER,
-      "repo": REPO,
-      "discussionNumber": discussion_number,
-  }
-  try:
-    response = run_graphql_query(query, variables)
-    if "errors" in response:
-      return error_response(str(response["errors"]))
-    discussion_data = (
-        response.get("data", {}).get("repository", {}).get("discussion")
-    )
-    if not discussion_data:
-      return error_response(f"Discussion #{discussion_number} not found.")
-    return {"status": "success", "discussion": discussion_data}
-  except requests.exceptions.RequestException as e:
-    return error_response(str(e))
-
-
-def add_comment_to_discussion(
-    discussion_id: str, comment_body: str
-) -> dict[str, Any]:
-  """Adds a comment to a specific discussion.
-
-  Args:
-      discussion_id: The GraphQL node ID of the discussion.
-      comment_body: The content of the comment in Markdown.
-
-  Returns:
-      The status of the request and the new comment's details.
-  """
-  print(f"Adding comment to discussion {discussion_id}")
-  query = """
-        mutation($discussionId: ID!, $body: String!) {
-          addDiscussionComment(input: {discussionId: $discussionId, body: $body}) {
-            comment {
-              id
-              body
-              createdAt
-              author {
-                login
-              }
-            }
-          }
-        }
-    """
-  comment_body = (
-      "**Response from ADK Answering Agent (experimental, answer may be"
-      " inaccurate)**\n\n"
-      + comment_body
-  )
-
-  variables = {"discussionId": discussion_id, "body": comment_body}
-  try:
-    response = run_graphql_query(query, variables)
-    if "errors" in response:
-      return error_response(str(response["errors"]))
-    new_comment = (
-        response.get("data", {}).get("addDiscussionComment", {}).get("comment")
-    )
-    return {"status": "success", "comment": new_comment}
-  except requests.exceptions.RequestException as e:
-    return error_response(str(e))
-
-
-def get_label_id(label_name: str) -> str | None:
-  """Helper function to find the GraphQL node ID for a given label name."""
-  print(f"Finding ID for label '{label_name}'...")
-  query = """
-    query($owner: String!, $repo: String!, $labelName: String!) {
-      repository(owner: $owner, name: $repo) {
-        label(name: $labelName) {
-          id
-        }
-      }
-    }
-    """
-  variables = {"owner": OWNER, "repo": REPO, "labelName": label_name}
-
-  try:
-    response = run_graphql_query(query, variables)
-    if "errors" in response:
-      print(
-          f"[Warning] Error from GitHub API response for label '{label_name}':"
-          f" {response['errors']}"
-      )
-      return None
-    label_info = response["data"].get("repository", {}).get("label")
-    if label_info:
-      return label_info.get("id")
-    print(f"[Warning] Label information for '{label_name}' not found.")
-    return None
-  except requests.exceptions.RequestException as e:
-    print(f"[Warning] Error from GitHub API: {e}")
-    return None
-
-
-def add_label_to_discussion(
-    discussion_id: str, label_name: str
-) -> dict[str, Any]:
-  """Adds a label to a specific discussion.
-
-  Args:
-      discussion_id: The GraphQL node ID of the discussion.
-      label_name: The name of the label to add (e.g., "bug").
-
-  Returns:
-      The status of the request and the label details.
-  """
-  print(
-      f"Attempting to add label '{label_name}' to discussion {discussion_id}..."
-  )
-  # First, get the GraphQL ID of the label by its name
-  label_id = get_label_id(label_name)
-  if not label_id:
-    return error_response(f"Label '{label_name}' not found.")
-
-  # Then, perform the mutation to add the label to the discussion
-  mutation = """
-    mutation AddLabel($discussionId: ID!, $labelId: ID!) {
-      addLabelsToLabelable(input: {labelableId: $discussionId, labelIds: [$labelId]}) {
-        clientMutationId
-      }
-    }
-    """
-  variables = {"discussionId": discussion_id, "labelId": label_id}
-  try:
-    response = run_graphql_query(mutation, variables)
-    if "errors" in response:
-      return error_response(str(response["errors"]))
-    return {"status": "success", "label_id": label_id, "label_name": label_name}
-  except requests.exceptions.RequestException as e:
-    return error_response(str(e))
-
-
 root_agent = Agent(
    model="gemini-2.5-pro",
    name="adk_answering_agent",
@@ -261,19 +69,14 @@ root_agent = Agent(
      * Please include your justification for your decision in your output
        to the user who is telling with you.
      * If you uses citation from the document store, please provide a footnote
-        referencing the source document format it as: "[1] URL of the document".
-        * Replace the "gs://prefix/" part, e.g. "gs://adk-qa-bucket/", to be "https://github.com/google/"
-        * Add "blob/main/" after the repo name, e.g. "adk-python", "adk-docs", for example:
-          * If the original URL is "gs://adk-qa-bucket/adk-python/src/google/adk/version.py",
-            then the citation URL is "https://github.com/google/adk-python/blob/main/src/google/adk/version.py",
-          * If the original URL is "gs://adk-qa-bucket/adk-docs/docs/index.md",
-            then the citation URL is "https://github.com/google/adk-docs/blob/main/docs/index.md"
-        * If the file is a html file, replace the ".html" to be ".md"
+        referencing the source document format it as: "[1] publicly accessible HTTPS URL of the document".
+        * You can use the `convert_gcs_links_to_https` tool to convert GCS links to HTTPS links.
    """,
    tools=[
        VertexAiSearchTool(data_store_id=VERTEXAI_DATASTORE_ID),
        get_discussion_and_comments,
        add_comment_to_discussion,
        add_label_to_discussion,
+        convert_gcs_links_to_https,
    ],
 )
@@ -0,0 +1,229 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+from typing import Dict
+from typing import Optional
+
+from adk_answering_agent.settings import OWNER
+from adk_answering_agent.settings import REPO
+from adk_answering_agent.utils import convert_gcs_to_https
+from adk_answering_agent.utils import error_response
+from adk_answering_agent.utils import run_graphql_query
+import requests
+
+
+def get_discussion_and_comments(discussion_number: int) -> dict[str, Any]:
+  """Fetches a discussion and its comments using the GitHub GraphQL API.
+
+  Args:
+      discussion_number: The number of the GitHub discussion.
+
+  Returns:
+      A dictionary with the request status and the discussion details.
+  """
+  print(f"Attempting to get discussion #{discussion_number} and its comments")
+  query = """
+        query($owner: String!, $repo: String!, $discussionNumber: Int!) {
+          repository(owner: $owner, name: $repo) {
+            discussion(number: $discussionNumber) {
+              id
+              title
+              body
+              createdAt
+              closed
+              author {
+                login
+              }
+              # For each discussion, fetch the latest 20 labels.
+              labels(last: 20) {
+                nodes {
+                  id
+                  name
+                }
+              }
+              # For each discussion, fetch the latest 100 comments.
+              comments(last: 100) {
+                nodes {
+                  id
+                  body
+                  createdAt
+                  author {
+                    login
+                  }
+                  # For each discussion, fetch the latest 50 replies
+                  replies(last: 50) {
+                    nodes {
+                      id
+                      body
+                      createdAt
+                      author {
+                        login
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+    """
+  variables = {
+      "owner": OWNER,
+      "repo": REPO,
+      "discussionNumber": discussion_number,
+  }
+  try:
+    response = run_graphql_query(query, variables)
+    if "errors" in response:
+      return error_response(str(response["errors"]))
+    discussion_data = (
+        response.get("data", {}).get("repository", {}).get("discussion")
+    )
+    if not discussion_data:
+      return error_response(f"Discussion #{discussion_number} not found.")
+    return {"status": "success", "discussion": discussion_data}
+  except requests.exceptions.RequestException as e:
+    return error_response(str(e))
+
+
+def add_comment_to_discussion(
+    discussion_id: str, comment_body: str
+) -> dict[str, Any]:
+  """Adds a comment to a specific discussion.
+
+  Args:
+      discussion_id: The GraphQL node ID of the discussion.
+      comment_body: The content of the comment in Markdown.
+
+  Returns:
+      The status of the request and the new comment's details.
+  """
+  print(f"Adding comment to discussion {discussion_id}")
+  query = """
+        mutation($discussionId: ID!, $body: String!) {
+          addDiscussionComment(input: {discussionId: $discussionId, body: $body}) {
+            comment {
+              id
+              body
+              createdAt
+              author {
+                login
+              }
+            }
+          }
+        }
+    """
+  comment_body = (
+      "**Response from ADK Answering Agent (experimental, answer may be"
+      " inaccurate)**\n\n"
+      + comment_body
+  )
+
+  variables = {"discussionId": discussion_id, "body": comment_body}
+  try:
+    response = run_graphql_query(query, variables)
+    if "errors" in response:
+      return error_response(str(response["errors"]))
+    new_comment = (
+        response.get("data", {}).get("addDiscussionComment", {}).get("comment")
+    )
+    return {"status": "success", "comment": new_comment}
+  except requests.exceptions.RequestException as e:
+    return error_response(str(e))
+
+
+def get_label_id(label_name: str) -> str | None:
+  """Helper function to find the GraphQL node ID for a given label name."""
+  print(f"Finding ID for label '{label_name}'...")
+  query = """
+    query($owner: String!, $repo: String!, $labelName: String!) {
+      repository(owner: $owner, name: $repo) {
+        label(name: $labelName) {
+          id
+        }
+      }
+    }
+    """
+  variables = {"owner": OWNER, "repo": REPO, "labelName": label_name}
+
+  try:
+    response = run_graphql_query(query, variables)
+    if "errors" in response:
+      print(
+          f"[Warning] Error from GitHub API response for label '{label_name}':"
+          f" {response['errors']}"
+      )
+      return None
+    label_info = response["data"].get("repository", {}).get("label")
+    if label_info:
+      return label_info.get("id")
+    print(f"[Warning] Label information for '{label_name}' not found.")
+    return None
+  except requests.exceptions.RequestException as e:
+    print(f"[Warning] Error from GitHub API: {e}")
+    return None
+
+
+def add_label_to_discussion(
+    discussion_id: str, label_name: str
+) -> dict[str, Any]:
+  """Adds a label to a specific discussion.
+
+  Args:
+      discussion_id: The GraphQL node ID of the discussion.
+      label_name: The name of the label to add (e.g., "bug").
+
+  Returns:
+      The status of the request and the label details.
+  """
+  print(
+      f"Attempting to add label '{label_name}' to discussion {discussion_id}..."
+  )
+  # First, get the GraphQL ID of the label by its name
+  label_id = get_label_id(label_name)
+  if not label_id:
+    return error_response(f"Label '{label_name}' not found.")
+
+  # Then, perform the mutation to add the label to the discussion
+  mutation = """
+    mutation AddLabel($discussionId: ID!, $labelId: ID!) {
+      addLabelsToLabelable(input: {labelableId: $discussionId, labelIds: [$labelId]}) {
+        clientMutationId
+      }
+    }
+    """
+  variables = {"discussionId": discussion_id, "labelId": label_id}
+  try:
+    response = run_graphql_query(mutation, variables)
+    if "errors" in response:
+      return error_response(str(response["errors"]))
+    return {"status": "success", "label_id": label_id, "label_name": label_name}
+  except requests.exceptions.RequestException as e:
+    return error_response(str(e))
+
+
+def convert_gcs_links_to_https(gcs_uris: list[str]) -> Dict[str, Optional[str]]:
+  """Converts GCS files link into publicly accessible HTTPS links.
+
+  Args:
+      gcs_uris: A list of GCS files links, in the format
+        'gs://bucket_name/prefix/relative_path'.
+
+  Returns:
+      A dictionary mapping the original GCS files links to the converted HTTPS
+      links. If a GCS link is invalid, the corresponding value in the dictionary
+      will be None.
+  """
+  return {gcs_uri: convert_gcs_to_https(gcs_uri) for gcs_uri in gcs_uris}
@@ -12,8 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import os
 import sys
 from typing import Any
+from typing import Optional
+from urllib.parse import urljoin

 from adk_answering_agent.settings import GITHUB_GRAPHQL_URL
 from adk_answering_agent.settings import GITHUB_TOKEN
@@ -58,6 +61,92 @@ def parse_number_string(number_str: str | None, default_value: int = 0) -> int:
    return default_value


+def _check_url_exists(url: str) -> bool:
+  """Checks if a URL exists and is accessible."""
+  try:
+    # Set a timeout to prevent the program from waiting indefinitely.
+    # allow_redirects=True ensures we correctly handle valid links
+    # after redirection.
+    response = requests.head(url, timeout=5, allow_redirects=True)
+    # Status codes 2xx (Success) or 3xx (Redirection) are considered valid.
+    return response.ok
+  except requests.RequestException:
+    # Catch all possible exceptions from the requests library
+    # (e.g., connection errors, timeouts).
+    return False
+
+
+def _generate_github_url(repo_name: str, relative_path: str) -> str:
+  """Generates a standard GitHub URL for a repo file."""
+  return f"https://github.com/google/{repo_name}/blob/main/{relative_path}"
+
+
+def convert_gcs_to_https(gcs_uri: str) -> Optional[str]:
+  """Converts a GCS file link into a publicly accessible HTTPS link.
+
+  Args:
+      gcs_uri: The Google Cloud Storage link, in the format
+        'gs://bucket_name/prefix/relative_path'.
+
+  Returns:
+      The converted HTTPS link as a string, or None if the input format is
+      incorrect.
+  """
+  # Parse the GCS link
+  if not gcs_uri or not gcs_uri.startswith("gs://"):
+    print(f"Error: Invalid GCS link format: {gcs_uri}")
+    return None
+
+  try:
+    # Strip 'gs://' and split by '/', requiring at least 3 parts
+    # (bucket, prefix, path)
+    parts = gcs_uri[5:].split("/", 2)
+    if len(parts) < 3:
+      raise ValueError(
+          "GCS link must contain a bucket, prefix, and relative_path."
+      )
+
+    _, prefix, relative_path = parts
+  except (ValueError, IndexError) as e:
+    print(f"Error: Failed to parse GCS link '{gcs_uri}': {e}")
+    return None
+
+  # Replace .html with .md
+  if relative_path.endswith(".html"):
+    relative_path = relative_path.removesuffix(".html") + ".md"
+
+  # Convert the links for adk-docs
+  if prefix == "adk-docs" and relative_path.startswith("docs/"):
+    path_after_docs = relative_path[len("docs/") :]
+    if not path_after_docs.endswith(".md"):
+      # Use the regular github url
+      return _generate_github_url(prefix, relative_path)
+
+    base_url = "https://google.github.io/adk-docs/"
+    if os.path.basename(path_after_docs) == "index.md":
+      # Use the directory path if it is a index file
+      final_path_segment = os.path.dirname(path_after_docs)
+    else:
+      # Otherwise, use the file name without extention
+      final_path_segment = path_after_docs.removesuffix(".md")
+
+    if final_path_segment and not final_path_segment.endswith("/"):
+      final_path_segment += "/"
+
+    potential_url = urljoin(base_url, final_path_segment)
+
+    # Check if the generated link exists
+    if _check_url_exists(potential_url):
+      return potential_url
+    else:
+      # If it doesn't exist, fallback to the regular github url
+      return _generate_github_url(prefix, relative_path)
+
+  # Convert the links for other cases, e.g. adk-python
+  else:
+    return _generate_github_url(prefix, relative_path)
+
+
 async def call_agent_async(
    runner: Runner, user_id: str, session_id: str, prompt: str
 ) -> str: