fix: Add schema type sanitization to OpenAPI spec parser

This change introduces a `_sanitize_schema_types` method to the OpenAPI spec parser. This method recursively removes or filters out non-standard schema types (e.g., "Any", "Unknown") from the OpenAPI specification

Close #3704
Close #3108

Co-authored-by: George Weale <gweale@google.com>
PiperOrigin-RevId: 852986491
This commit is contained in:
George Weale
2026-01-06 16:46:06 -08:00
committed by Copybara-Service
parent ce64787c3e
commit 6dce7f8a8f
2 changed files with 271 additions and 0 deletions
@@ -19,6 +19,7 @@ from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Set
from fastapi.openapi.models import Operation
from pydantic import BaseModel
@@ -29,6 +30,21 @@ from ..._gemini_schema_util import _to_snake_case
from ..common.common import ApiParameter
from .operation_parser import OperationParser
# Valid JSON Schema types as per OpenAPI 3.0/3.1 specification.
#
# These are the only types accepted by Pydantic 2.11+ for Schema.type.
_VALID_SCHEMA_TYPES: Set[str] = frozenset({
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string",
})
_SCHEMA_CONTAINER_KEYS: Set[str] = frozenset({"schema", "schemas"})
class OperationEndpoint(BaseModel):
base_url: str
@@ -70,9 +86,81 @@ class OpenApiSpecParser:
"""
openapi_spec_dict = self._resolve_references(openapi_spec_dict)
openapi_spec_dict = self._sanitize_schema_types(openapi_spec_dict)
operations = self._collect_operations(openapi_spec_dict)
return operations
def _sanitize_schema_types(
self, openapi_spec: Dict[str, Any]
) -> Dict[str, Any]:
"""Recursively sanitizes schema types in an OpenAPI specification.
Pydantic 2.11+ strictly validates that schema types are one of:
'array', 'boolean', 'integer', 'null', 'number', 'object', 'string'.
External APIs (like Google Integration Connectors) may return schemas
with non-standard types like 'Any'. This method removes or converts
such invalid types to ensure compatibility.
Args:
openapi_spec: A dictionary representing the OpenAPI specification.
Returns:
A dictionary with invalid schema types removed or sanitized.
"""
openapi_spec = copy.deepcopy(openapi_spec)
def sanitize_type_field(schema_dict: Dict[str, Any]) -> None:
if "type" not in schema_dict:
return
type_value = schema_dict["type"]
if isinstance(type_value, str):
normalized_type = type_value.lower()
if normalized_type in _VALID_SCHEMA_TYPES:
schema_dict["type"] = normalized_type
return
del schema_dict["type"]
return
if isinstance(type_value, list):
valid_types = []
for entry in type_value:
if not isinstance(entry, str):
continue
normalized_entry = entry.lower()
if normalized_entry not in _VALID_SCHEMA_TYPES:
continue
if normalized_entry not in valid_types:
valid_types.append(normalized_entry)
if valid_types:
schema_dict["type"] = valid_types
else:
del schema_dict["type"]
def sanitize_recursive(obj: Any, *, in_schema: bool) -> Any:
if isinstance(obj, dict):
if in_schema:
sanitize_type_field(obj)
# Recursively process all values in the dict
for key, value in obj.items():
obj[key] = sanitize_recursive(
value,
in_schema=in_schema or key in _SCHEMA_CONTAINER_KEYS,
)
return obj
elif isinstance(obj, list):
return [sanitize_recursive(item, in_schema=in_schema) for item in obj]
else:
return obj
return sanitize_recursive(openapi_spec, in_schema=False)
def _collect_operations(
self, openapi_spec: Dict[str, Any]
) -> List[ParsedOperation]:
@@ -681,3 +681,186 @@ def test_parse_spec_with_path_level_parameters(openapi_spec_generator):
assert local_param is not None
assert local_param.param_location == "header"
assert local_param.type_value is int
def test_parse_spec_with_invalid_type_any(openapi_spec_generator):
"""Test that schemas with type='Any' are sanitized for Pydantic 2.11+.
External APIs like Google Integration Connectors may return schemas with
non-standard types like 'Any'. This test verifies that such types are
removed to allow parsing to succeed.
"""
openapi_spec = {
"openapi": "3.1.0",
"info": {"title": "API with Any type", "version": "1.0.0"},
"paths": {
"/test": {
"get": {
"operationId": "testAnyType",
"responses": {
"200": {
"description": "Success",
"content": {
"application/json": {"schema": {"type": "Any"}}
},
}
},
}
}
},
}
# This should not raise a ValidationError
parsed_operations = openapi_spec_generator.parse(openapi_spec)
assert len(parsed_operations) == 1
assert parsed_operations[0].name == "test_any_type"
def test_parse_spec_with_nested_invalid_types(openapi_spec_generator):
"""Test that nested schemas with invalid types are sanitized."""
openapi_spec = {
"openapi": "3.1.0",
"info": {"title": "Nested Invalid Types API", "version": "1.0.0"},
"paths": {
"/test": {
"post": {
"operationId": "testNestedInvalid",
"requestBody": {
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"valid_prop": {"type": "string"},
"invalid_prop": {"type": "Unknown"},
"nested_obj": {
"type": "object",
"properties": {
"deeply_invalid": {
"type": "CustomType"
}
},
},
},
}
}
}
},
"responses": {"200": {"description": "OK"}},
}
}
},
}
# This should not raise a ValidationError
parsed_operations = openapi_spec_generator.parse(openapi_spec)
assert len(parsed_operations) == 1
op = parsed_operations[0]
# The valid properties should still be parsed
param_names = [p.original_name for p in op.parameters]
assert "valid_prop" in param_names
assert "invalid_prop" in param_names
assert "nested_obj" in param_names
def test_parse_spec_with_type_list_containing_invalid(openapi_spec_generator):
"""Test that type arrays with invalid values are filtered."""
openapi_spec = {
"openapi": "3.1.0",
"info": {"title": "Type List API", "version": "1.0.0"},
"paths": {
"/test": {
"get": {
"operationId": "testTypeList",
"responses": {
"200": {
"description": "Success",
"content": {
"application/json": {
"schema": {"type": ["string", "Any", "null"]}
}
},
}
},
}
}
},
}
# This should not raise a ValidationError
parsed_operations = openapi_spec_generator.parse(openapi_spec)
assert len(parsed_operations) == 1
def test_sanitize_schema_types_removes_invalid_types(openapi_spec_generator):
"""Test that _sanitize_schema_types correctly handles invalid types."""
spec_with_invalid = {
"components": {
"schemas": {
"InvalidSchema": {"type": "Any", "description": "Invalid type"},
"ValidSchema": {"type": "string", "description": "Valid type"},
}
}
}
sanitized = openapi_spec_generator._sanitize_schema_types(spec_with_invalid)
# Invalid type should be removed
assert "type" not in sanitized["components"]["schemas"]["InvalidSchema"]
assert (
sanitized["components"]["schemas"]["InvalidSchema"]["description"]
== "Invalid type"
)
# Valid type should be preserved
assert sanitized["components"]["schemas"]["ValidSchema"]["type"] == "string"
def test_sanitize_schema_types_does_not_touch_security_schemes(
openapi_spec_generator,
):
"""Test that schema type sanitization does not affect security schemes."""
spec = {
"components": {
"schemas": {"InvalidSchema": {"type": "Any"}},
"securitySchemes": {
"api_key": {
"type": "apiKey",
"in": "header",
"name": "X-API-Key",
}
},
}
}
sanitized = openapi_spec_generator._sanitize_schema_types(spec)
assert "type" not in sanitized["components"]["schemas"]["InvalidSchema"]
assert (
sanitized["components"]["securitySchemes"]["api_key"]["type"] == "apiKey"
)
def test_sanitize_schema_types_filters_type_lists(openapi_spec_generator):
"""Test that type lists with invalid values are filtered."""
spec_with_list = {"schema": {"type": ["string", "Any", "null", "Unknown"]}}
sanitized = openapi_spec_generator._sanitize_schema_types(spec_with_list)
# Only valid types should remain
assert sanitized["schema"]["type"] == ["string", "null"]
def test_sanitize_schema_types_removes_all_invalid_list(openapi_spec_generator):
"""Test that type field is removed when all list values are invalid."""
spec_with_all_invalid = {"schema": {"type": ["Any", "Unknown", "Custom"]}}
sanitized = openapi_spec_generator._sanitize_schema_types(
spec_with_all_invalid
)
# Type field should be removed entirely
assert "type" not in sanitized["schema"]