docs: Add path sanitization for model-generated file paths

Co-authored-by: George Weale <gweale@google.com>
PiperOrigin-RevId: 834344352
This commit is contained in:
George Weale
2025-11-19 10:12:45 -08:00
committed by Copybara-Service
parent 857de04deb
commit 0ac35b23dc
3 changed files with 69 additions and 4 deletions
@@ -31,6 +31,7 @@ import jsonschema
import yaml
from ..utils import load_agent_config_schema
from ..utils.path_normalizer import sanitize_generated_file_path
from ..utils.resolve_root_directory import resolve_file_path
from .write_files import write_files
@@ -177,8 +178,9 @@ async def write_config_files(
# Step 1: Validate all configs before writing any files
for file_path, config_content in configs.items():
normalized_input_path = sanitize_generated_file_path(file_path)
file_result = _validate_single_config(
file_path, config_content, project_folder_name
normalized_input_path, config_content, project_folder_name
)
result["files"][file_path] = file_result
@@ -197,7 +199,7 @@ async def write_config_files(
rename_applied,
sanitized_name,
rename_warning,
) = _determine_target_file_path(file_path, agent_name)
) = _determine_target_file_path(normalized_input_path, agent_name)
file_result["target_file_path"] = target_path
file_result["rename_applied"] = rename_applied
@@ -0,0 +1,60 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helpers for normalizing file path strings produced by the model."""
from __future__ import annotations
import re
_SEGMENT_SPLIT_PATTERN = re.compile(r"([/\\])")
_BOUNDARY_CHARS = " \t\r\n'\"`"
def sanitize_generated_file_path(file_path: str) -> str:
"""Strip stray quotes/whitespace around each path segment.
The agent occasionally emits quoted paths such as `'tools/web.yaml'` which
would otherwise create directories literally named `'<name>`. This helper
removes leading/trailing whitespace and quote-like characters from the path
and from each path component while preserving intentional interior
characters.
Args:
file_path: Path string provided by the model or user.
Returns:
Sanitized path string safe to feed into pathlib.Path.
"""
if not isinstance(file_path, str):
file_path = str(file_path)
trimmed = file_path.strip()
if not trimmed:
return trimmed
segments = _SEGMENT_SPLIT_PATTERN.split(trimmed)
sanitized_segments: list[str] = []
for segment in segments:
if not segment:
sanitized_segments.append(segment)
continue
if segment in ("/", "\\"):
sanitized_segments.append(segment)
continue
sanitized_segments.append(segment.strip(_BOUNDARY_CHARS))
sanitized = "".join(sanitized_segments).strip(_BOUNDARY_CHARS)
return sanitized or trimmed
@@ -21,6 +21,8 @@ from typing import Dict
from typing import List
from typing import Optional
from .path_normalizer import sanitize_generated_file_path
def resolve_file_path(
file_path: str,
@@ -40,7 +42,8 @@ def resolve_file_path(
Returns:
Resolved absolute Path object
"""
file_path_obj = Path(file_path)
normalized_path = sanitize_generated_file_path(file_path)
file_path_obj = Path(normalized_path)
# If already absolute, use as-is
if file_path_obj.is_absolute():
@@ -63,7 +66,7 @@ def resolve_file_path(
resolved_root = Path(os.getcwd()) / root_directory
# Resolve file path relative to root directory
return resolved_root / file_path
return resolved_root / file_path_obj
def resolve_file_paths(