chore: update ADK Answering agent to reference doc site instead of adk-docs repo

PiperOrigin-RevId: 792259386
This commit is contained in:
Xuan Yang
2025-08-07 12:16:30 -07:00
committed by Copybara-Service
parent 20c30d5819
commit b5a8bad170
3 changed files with 325 additions and 204 deletions
@@ -12,18 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from adk_answering_agent.settings import BOT_RESPONSE_LABEL
from adk_answering_agent.settings import IS_INTERACTIVE
from adk_answering_agent.settings import OWNER
from adk_answering_agent.settings import REPO
from adk_answering_agent.settings import VERTEXAI_DATASTORE_ID
from adk_answering_agent.utils import error_response
from adk_answering_agent.utils import run_graphql_query
from adk_answering_agent.tools import add_comment_to_discussion
from adk_answering_agent.tools import add_label_to_discussion
from adk_answering_agent.tools import convert_gcs_links_to_https
from adk_answering_agent.tools import get_discussion_and_comments
from google.adk.agents.llm_agent import Agent
from google.adk.tools.vertex_ai_search_tool import VertexAiSearchTool
import requests
if IS_INTERACTIVE:
APPROVAL_INSTRUCTION = (
@@ -35,197 +34,6 @@ else:
" comment."
)
def get_discussion_and_comments(discussion_number: int) -> dict[str, Any]:
"""Fetches a discussion and its comments using the GitHub GraphQL API.
Args:
discussion_number: The number of the GitHub discussion.
Returns:
A dictionary with the request status and the discussion details.
"""
print(f"Attempting to get discussion #{discussion_number} and its comments")
query = """
query($owner: String!, $repo: String!, $discussionNumber: Int!) {
repository(owner: $owner, name: $repo) {
discussion(number: $discussionNumber) {
id
title
body
createdAt
closed
author {
login
}
# For each discussion, fetch the latest 20 labels.
labels(last: 20) {
nodes {
id
name
}
}
# For each discussion, fetch the latest 100 comments.
comments(last: 100) {
nodes {
id
body
createdAt
author {
login
}
# For each discussion, fetch the latest 50 replies
replies(last: 50) {
nodes {
id
body
createdAt
author {
login
}
}
}
}
}
}
}
}
"""
variables = {
"owner": OWNER,
"repo": REPO,
"discussionNumber": discussion_number,
}
try:
response = run_graphql_query(query, variables)
if "errors" in response:
return error_response(str(response["errors"]))
discussion_data = (
response.get("data", {}).get("repository", {}).get("discussion")
)
if not discussion_data:
return error_response(f"Discussion #{discussion_number} not found.")
return {"status": "success", "discussion": discussion_data}
except requests.exceptions.RequestException as e:
return error_response(str(e))
def add_comment_to_discussion(
discussion_id: str, comment_body: str
) -> dict[str, Any]:
"""Adds a comment to a specific discussion.
Args:
discussion_id: The GraphQL node ID of the discussion.
comment_body: The content of the comment in Markdown.
Returns:
The status of the request and the new comment's details.
"""
print(f"Adding comment to discussion {discussion_id}")
query = """
mutation($discussionId: ID!, $body: String!) {
addDiscussionComment(input: {discussionId: $discussionId, body: $body}) {
comment {
id
body
createdAt
author {
login
}
}
}
}
"""
comment_body = (
"**Response from ADK Answering Agent (experimental, answer may be"
" inaccurate)**\n\n"
+ comment_body
)
variables = {"discussionId": discussion_id, "body": comment_body}
try:
response = run_graphql_query(query, variables)
if "errors" in response:
return error_response(str(response["errors"]))
new_comment = (
response.get("data", {}).get("addDiscussionComment", {}).get("comment")
)
return {"status": "success", "comment": new_comment}
except requests.exceptions.RequestException as e:
return error_response(str(e))
def get_label_id(label_name: str) -> str | None:
"""Helper function to find the GraphQL node ID for a given label name."""
print(f"Finding ID for label '{label_name}'...")
query = """
query($owner: String!, $repo: String!, $labelName: String!) {
repository(owner: $owner, name: $repo) {
label(name: $labelName) {
id
}
}
}
"""
variables = {"owner": OWNER, "repo": REPO, "labelName": label_name}
try:
response = run_graphql_query(query, variables)
if "errors" in response:
print(
f"[Warning] Error from GitHub API response for label '{label_name}':"
f" {response['errors']}"
)
return None
label_info = response["data"].get("repository", {}).get("label")
if label_info:
return label_info.get("id")
print(f"[Warning] Label information for '{label_name}' not found.")
return None
except requests.exceptions.RequestException as e:
print(f"[Warning] Error from GitHub API: {e}")
return None
def add_label_to_discussion(
discussion_id: str, label_name: str
) -> dict[str, Any]:
"""Adds a label to a specific discussion.
Args:
discussion_id: The GraphQL node ID of the discussion.
label_name: The name of the label to add (e.g., "bug").
Returns:
The status of the request and the label details.
"""
print(
f"Attempting to add label '{label_name}' to discussion {discussion_id}..."
)
# First, get the GraphQL ID of the label by its name
label_id = get_label_id(label_name)
if not label_id:
return error_response(f"Label '{label_name}' not found.")
# Then, perform the mutation to add the label to the discussion
mutation = """
mutation AddLabel($discussionId: ID!, $labelId: ID!) {
addLabelsToLabelable(input: {labelableId: $discussionId, labelIds: [$labelId]}) {
clientMutationId
}
}
"""
variables = {"discussionId": discussion_id, "labelId": label_id}
try:
response = run_graphql_query(mutation, variables)
if "errors" in response:
return error_response(str(response["errors"]))
return {"status": "success", "label_id": label_id, "label_name": label_name}
except requests.exceptions.RequestException as e:
return error_response(str(e))
root_agent = Agent(
model="gemini-2.5-pro",
name="adk_answering_agent",
@@ -261,19 +69,14 @@ root_agent = Agent(
* Please include your justification for your decision in your output
to the user who is telling with you.
* If you uses citation from the document store, please provide a footnote
referencing the source document format it as: "[1] URL of the document".
* Replace the "gs://prefix/" part, e.g. "gs://adk-qa-bucket/", to be "https://github.com/google/"
* Add "blob/main/" after the repo name, e.g. "adk-python", "adk-docs", for example:
* If the original URL is "gs://adk-qa-bucket/adk-python/src/google/adk/version.py",
then the citation URL is "https://github.com/google/adk-python/blob/main/src/google/adk/version.py",
* If the original URL is "gs://adk-qa-bucket/adk-docs/docs/index.md",
then the citation URL is "https://github.com/google/adk-docs/blob/main/docs/index.md"
* If the file is a html file, replace the ".html" to be ".md"
referencing the source document format it as: "[1] publicly accessible HTTPS URL of the document".
* You can use the `convert_gcs_links_to_https` tool to convert GCS links to HTTPS links.
""",
tools=[
VertexAiSearchTool(data_store_id=VERTEXAI_DATASTORE_ID),
get_discussion_and_comments,
add_comment_to_discussion,
add_label_to_discussion,
convert_gcs_links_to_https,
],
)
@@ -0,0 +1,229 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import Dict
from typing import Optional
from adk_answering_agent.settings import OWNER
from adk_answering_agent.settings import REPO
from adk_answering_agent.utils import convert_gcs_to_https
from adk_answering_agent.utils import error_response
from adk_answering_agent.utils import run_graphql_query
import requests
def get_discussion_and_comments(discussion_number: int) -> dict[str, Any]:
"""Fetches a discussion and its comments using the GitHub GraphQL API.
Args:
discussion_number: The number of the GitHub discussion.
Returns:
A dictionary with the request status and the discussion details.
"""
print(f"Attempting to get discussion #{discussion_number} and its comments")
query = """
query($owner: String!, $repo: String!, $discussionNumber: Int!) {
repository(owner: $owner, name: $repo) {
discussion(number: $discussionNumber) {
id
title
body
createdAt
closed
author {
login
}
# For each discussion, fetch the latest 20 labels.
labels(last: 20) {
nodes {
id
name
}
}
# For each discussion, fetch the latest 100 comments.
comments(last: 100) {
nodes {
id
body
createdAt
author {
login
}
# For each discussion, fetch the latest 50 replies
replies(last: 50) {
nodes {
id
body
createdAt
author {
login
}
}
}
}
}
}
}
}
"""
variables = {
"owner": OWNER,
"repo": REPO,
"discussionNumber": discussion_number,
}
try:
response = run_graphql_query(query, variables)
if "errors" in response:
return error_response(str(response["errors"]))
discussion_data = (
response.get("data", {}).get("repository", {}).get("discussion")
)
if not discussion_data:
return error_response(f"Discussion #{discussion_number} not found.")
return {"status": "success", "discussion": discussion_data}
except requests.exceptions.RequestException as e:
return error_response(str(e))
def add_comment_to_discussion(
discussion_id: str, comment_body: str
) -> dict[str, Any]:
"""Adds a comment to a specific discussion.
Args:
discussion_id: The GraphQL node ID of the discussion.
comment_body: The content of the comment in Markdown.
Returns:
The status of the request and the new comment's details.
"""
print(f"Adding comment to discussion {discussion_id}")
query = """
mutation($discussionId: ID!, $body: String!) {
addDiscussionComment(input: {discussionId: $discussionId, body: $body}) {
comment {
id
body
createdAt
author {
login
}
}
}
}
"""
comment_body = (
"**Response from ADK Answering Agent (experimental, answer may be"
" inaccurate)**\n\n"
+ comment_body
)
variables = {"discussionId": discussion_id, "body": comment_body}
try:
response = run_graphql_query(query, variables)
if "errors" in response:
return error_response(str(response["errors"]))
new_comment = (
response.get("data", {}).get("addDiscussionComment", {}).get("comment")
)
return {"status": "success", "comment": new_comment}
except requests.exceptions.RequestException as e:
return error_response(str(e))
def get_label_id(label_name: str) -> str | None:
"""Helper function to find the GraphQL node ID for a given label name."""
print(f"Finding ID for label '{label_name}'...")
query = """
query($owner: String!, $repo: String!, $labelName: String!) {
repository(owner: $owner, name: $repo) {
label(name: $labelName) {
id
}
}
}
"""
variables = {"owner": OWNER, "repo": REPO, "labelName": label_name}
try:
response = run_graphql_query(query, variables)
if "errors" in response:
print(
f"[Warning] Error from GitHub API response for label '{label_name}':"
f" {response['errors']}"
)
return None
label_info = response["data"].get("repository", {}).get("label")
if label_info:
return label_info.get("id")
print(f"[Warning] Label information for '{label_name}' not found.")
return None
except requests.exceptions.RequestException as e:
print(f"[Warning] Error from GitHub API: {e}")
return None
def add_label_to_discussion(
discussion_id: str, label_name: str
) -> dict[str, Any]:
"""Adds a label to a specific discussion.
Args:
discussion_id: The GraphQL node ID of the discussion.
label_name: The name of the label to add (e.g., "bug").
Returns:
The status of the request and the label details.
"""
print(
f"Attempting to add label '{label_name}' to discussion {discussion_id}..."
)
# First, get the GraphQL ID of the label by its name
label_id = get_label_id(label_name)
if not label_id:
return error_response(f"Label '{label_name}' not found.")
# Then, perform the mutation to add the label to the discussion
mutation = """
mutation AddLabel($discussionId: ID!, $labelId: ID!) {
addLabelsToLabelable(input: {labelableId: $discussionId, labelIds: [$labelId]}) {
clientMutationId
}
}
"""
variables = {"discussionId": discussion_id, "labelId": label_id}
try:
response = run_graphql_query(mutation, variables)
if "errors" in response:
return error_response(str(response["errors"]))
return {"status": "success", "label_id": label_id, "label_name": label_name}
except requests.exceptions.RequestException as e:
return error_response(str(e))
def convert_gcs_links_to_https(gcs_uris: list[str]) -> Dict[str, Optional[str]]:
"""Converts GCS files link into publicly accessible HTTPS links.
Args:
gcs_uris: A list of GCS files links, in the format
'gs://bucket_name/prefix/relative_path'.
Returns:
A dictionary mapping the original GCS files links to the converted HTTPS
links. If a GCS link is invalid, the corresponding value in the dictionary
will be None.
"""
return {gcs_uri: convert_gcs_to_https(gcs_uri) for gcs_uri in gcs_uris}
@@ -12,8 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from typing import Any
from typing import Optional
from urllib.parse import urljoin
from adk_answering_agent.settings import GITHUB_GRAPHQL_URL
from adk_answering_agent.settings import GITHUB_TOKEN
@@ -58,6 +61,92 @@ def parse_number_string(number_str: str | None, default_value: int = 0) -> int:
return default_value
def _check_url_exists(url: str) -> bool:
"""Checks if a URL exists and is accessible."""
try:
# Set a timeout to prevent the program from waiting indefinitely.
# allow_redirects=True ensures we correctly handle valid links
# after redirection.
response = requests.head(url, timeout=5, allow_redirects=True)
# Status codes 2xx (Success) or 3xx (Redirection) are considered valid.
return response.ok
except requests.RequestException:
# Catch all possible exceptions from the requests library
# (e.g., connection errors, timeouts).
return False
def _generate_github_url(repo_name: str, relative_path: str) -> str:
"""Generates a standard GitHub URL for a repo file."""
return f"https://github.com/google/{repo_name}/blob/main/{relative_path}"
def convert_gcs_to_https(gcs_uri: str) -> Optional[str]:
"""Converts a GCS file link into a publicly accessible HTTPS link.
Args:
gcs_uri: The Google Cloud Storage link, in the format
'gs://bucket_name/prefix/relative_path'.
Returns:
The converted HTTPS link as a string, or None if the input format is
incorrect.
"""
# Parse the GCS link
if not gcs_uri or not gcs_uri.startswith("gs://"):
print(f"Error: Invalid GCS link format: {gcs_uri}")
return None
try:
# Strip 'gs://' and split by '/', requiring at least 3 parts
# (bucket, prefix, path)
parts = gcs_uri[5:].split("/", 2)
if len(parts) < 3:
raise ValueError(
"GCS link must contain a bucket, prefix, and relative_path."
)
_, prefix, relative_path = parts
except (ValueError, IndexError) as e:
print(f"Error: Failed to parse GCS link '{gcs_uri}': {e}")
return None
# Replace .html with .md
if relative_path.endswith(".html"):
relative_path = relative_path.removesuffix(".html") + ".md"
# Convert the links for adk-docs
if prefix == "adk-docs" and relative_path.startswith("docs/"):
path_after_docs = relative_path[len("docs/") :]
if not path_after_docs.endswith(".md"):
# Use the regular github url
return _generate_github_url(prefix, relative_path)
base_url = "https://google.github.io/adk-docs/"
if os.path.basename(path_after_docs) == "index.md":
# Use the directory path if it is a index file
final_path_segment = os.path.dirname(path_after_docs)
else:
# Otherwise, use the file name without extention
final_path_segment = path_after_docs.removesuffix(".md")
if final_path_segment and not final_path_segment.endswith("/"):
final_path_segment += "/"
potential_url = urljoin(base_url, final_path_segment)
# Check if the generated link exists
if _check_url_exists(potential_url):
return potential_url
else:
# If it doesn't exist, fallback to the regular github url
return _generate_github_url(prefix, relative_path)
# Convert the links for other cases, e.g. adk-python
else:
return _generate_github_url(prefix, relative_path)
async def call_agent_async(
runner: Runner, user_id: str, session_id: str, prompt: str
) -> str: