fix: prompt token may be None in streaming mode

Merge https://github.com/google/adk-python/pull/3462 **Please ensure you have read the [contribution guide](https://github.com/google/adk-python/blob/main/CONTRIBUTING.md) before creating a pull request.** ### Link to Issue or Description of Change **1. Link to an existing issue (if applicable):** - Closes: #_issue_number_ - Related: #_issue_number_ **2. Or, if no issue exists, describe the change:** **Problem:** When using adk in streaming mode, `usage_metadata.prompt_token_count` may be `None` which will emit log ```Invalid type NoneType for attribute 'gen_ai.usage.input_tokens' value. Expected one of ['bool', 'str', 'bytes', 'int', 'float'] or a sequence of those types``` **Solution:** Skip setting span attribute if prompt token count is None **Unit Tests:** - [x] All unit tests pass locally. _Please include a summary of passed `pytest` results._ ### Checklist - [x] I have read the [CONTRIBUTING.md](https://github.com/google/adk-python/blob/main/CONTRIBUTING.md) document. - [x] I have performed a self-review of my own code. - [x] I have commented my code, particularly in hard-to-understand areas. - [x] I have added tests that prove my fix is effective or that my feature works. - [x] New and existing unit tests pass locally with my changes. - [x] I have manually tested my changes end-to-end. - [x] Any dependent changes have been merged and published in downstream modules. COPYBARA_INTEGRATE_REVIEW=https://github.com/google/adk-python/pull/3462 from wsa-2002:prompt-token-count-may-be-none-in-streaming-mode 94666862f70ed2577d5c55485e67f6da36a57bc6 PiperOrigin-RevId: 867693355
2026-03-30 10:57:20 -07:00 · 2026-02-09 11:23:49 -08:00
parent 43c437e38b
commit 32ee07df01
2 changed files with 49 additions and 4 deletions
@@ -327,10 +327,11 @@ def trace_call_llm(
    span.set_attribute('gcp.vertex.agent.llm_response', '{}')

  if llm_response.usage_metadata is not None:
-    span.set_attribute(
-        'gen_ai.usage.input_tokens',
-        llm_response.usage_metadata.prompt_token_count,
-    )
+    if llm_response.usage_metadata.prompt_token_count is not None:
+      span.set_attribute(
+          'gen_ai.usage.input_tokens',
+          llm_response.usage_metadata.prompt_token_count,
+      )
    if llm_response.usage_metadata.candidates_token_count is not None:
      span.set_attribute(
          'gen_ai.usage.output_tokens',
@@ -167,6 +167,50 @@ async def test_trace_call_llm(monkeypatch, mock_span_fixture):
  )


+@pytest.mark.asyncio
+async def test_trace_call_llm_with_no_usage_metadata(
+    monkeypatch, mock_span_fixture
+):
+  """Test trace_call_llm handles usage metadata with None token counts."""
+  monkeypatch.setattr(
+      'opentelemetry.trace.get_current_span', lambda: mock_span_fixture
+  )
+
+  agent = LlmAgent(name='test_agent')
+  invocation_context = await _create_invocation_context(agent)
+  llm_request = LlmRequest(
+      model='gemini-pro',
+      contents=[
+          types.Content(
+              role='user',
+              parts=[types.Part(text='Hello, how are you?')],
+          ),
+      ],
+      config=types.GenerateContentConfig(
+          top_p=0.95,
+          max_output_tokens=1024,
+      ),
+  )
+  llm_response = LlmResponse(
+      turn_complete=True,
+      finish_reason=types.FinishReason.STOP,
+      usage_metadata=types.GenerateContentResponseUsageMetadata(),
+  )
+  trace_call_llm(invocation_context, 'test_event_id', llm_request, llm_response)
+
+  expected_calls = [
+      mock.call('gen_ai.system', 'gcp.vertex.agent'),
+      mock.call('gen_ai.request.top_p', 0.95),
+      mock.call('gen_ai.request.max_tokens', 1024),
+      mock.call('gcp.vertex.agent.llm_response', mock.ANY),
+      mock.call('gen_ai.response.finish_reasons', ['stop']),
+  ]
+  assert mock_span_fixture.set_attribute.call_count == 10
+  mock_span_fixture.set_attribute.assert_has_calls(
+      expected_calls, any_order=True
+  )
+
+
@pytest.mark.asyncio
 async def test_trace_call_llm_with_binary_content(
    monkeypatch, mock_span_fixture