[perf] Standard code comments

This commit is contained in:
LittleMouse
2025-02-20 17:40:30 +08:00
parent 39f3c055ff
commit 42d9e41bf7
6 changed files with 42 additions and 16 deletions
+28
View File
@@ -0,0 +1,28 @@
*.pyc
__pycache__/
*.pyo
*.pyd
.DS_Store
.idea/
.vscode/
*.swp
*.swo
.env
.venv/
venv/
ENV/
images/
json/
logs/
legacy/
test/
txt/
*.log
*.sqlite
*.db
.coverage
htmlcov/
+3 -5
View File
@@ -98,13 +98,13 @@ async def chat_completions(request: Request, body: ChatCompletionRequest):
print(f"Sending chunk: {json_chunk}")
yield f"data: {json_chunk}\n\n"
except asyncio.CancelledError:
logger.warning("客户端提前断开连接,正在终止推理...")
logger.warning("Client disconnected early, terminating inference...")
if backend and isinstance(backend, LlmClientBackend):
for task in backend._active_tasks:
task.cancel()
raise
finally:
logger.debug("流连接已关闭")
logger.debug("Stream connection closed")
return StreamingResponse(
format_stream(),
@@ -142,7 +142,7 @@ async def create_completion(request: Request, body: CompletionRequest):
async def convert_stream():
async for chunk in chunk_generator:
# 转换格式后需要序列化为JSON字符串
# Convert format and serialize to JSON string
completion_chunk = {
"id": chunk.get("id", f"cmpl-{uuid.uuid4()}"),
"object": "text_completion.chunk",
@@ -155,10 +155,8 @@ async def create_completion(request: Request, body: CompletionRequest):
"finish_reason": chunk["choices"][0].get("finish_reason")
}]
}
# 添加SSE格式包装
yield f"data: {json.dumps(completion_chunk)}\n\n"
# 添加流结束标记
yield "data: [DONE]\n\n"
return StreamingResponse(
+1 -1
View File
@@ -1,6 +1,6 @@
from pydantic import BaseModel
from typing import Optional, List, Union
from .chat_schemas import ChatCompletionRequest # Note: You'll need to move the request models to a schemas.py file
from .chat_schemas import ChatCompletionRequest
class BaseModelBackend:
def __init__(self, model_config):
+3 -3
View File
@@ -107,7 +107,7 @@ class LlmClientBackend(BaseModelBackend):
loop = asyncio.get_event_loop()
for i, image_data in enumerate(base64_images):
message = client.send_jpeg(image_data, object_type="vlm.jpeg.base64")
print(f"发送第 {i+1} 张JPEG数据: {message[:20]}...")
print(f"Sending JPEG data #{i+1}: {message[:20]}...")
sync_gen = client.inference_stream(
query,
@@ -170,10 +170,10 @@ class LlmClientBackend(BaseModelBackend):
if response.status == 200:
image_data = await response.read()
return base64.b64encode(image_data).decode('utf-8')
self.logger.error(f"图片下载失败,状态码:{response.status}")
self.logger.error(f"Image download failed, status code: {response.status}")
return None
except Exception as e:
self.logger.error(f"图片下载异常:{str(e)}")
self.logger.error(f"Image download error: {str(e)}")
return None
async def generate(self, request: ChatCompletionRequest):
+5 -5
View File
@@ -21,7 +21,7 @@ class VisionModelBackend(BaseModelBackend):
"image_url": {"url": url}
}
# 下载外部图片并转换为base64
# Download external image and convert to base64
base64_str = await self.download_image(
url,
max_size=self.MAX_IMAGE_SIZE,
@@ -30,7 +30,7 @@ class VisionModelBackend(BaseModelBackend):
if not base64_str:
raise HTTPException(
status_code=400,
detail=f"无法加载图片: {url}"
detail=f"Failed to load image: {url}"
)
return {
@@ -94,12 +94,12 @@ class VisionModelBackend(BaseModelBackend):
if request.stream:
async def stream_wrapper():
async for chunk in response:
# 统一错误处理
# Unified error handling
if isinstance(chunk, dict) and "error" in chunk:
yield chunk
continue
# 转换为兼容格式
# Convert to compatible format
yield {
"id": f"chatcmpl-{uuid.uuid4()}",
"object": "chat.completion.chunk",
@@ -117,7 +117,7 @@ class VisionModelBackend(BaseModelBackend):
yield {"choices": [{"delta": {}, "finish_reason": "stop"}]}
return stream_wrapper()
# 非流式响应添加usage信息
# Add usage info for non-stream response
return {
"id": f"chatcmpl-{uuid.uuid4()}",
"object": "chat.completion",
+2 -2
View File
@@ -32,7 +32,7 @@ class LLMClient:
try:
self.sock.connect((self.host, self.port))
except ConnectionRefusedError as e:
raise RuntimeError(f"无法连接到 {self.host}:{self.port}") from e
raise RuntimeError(f"Failed to connect to {self.host}:{self.port}") from e
def close(self):
if self.sock:
@@ -120,7 +120,7 @@ if __name__ == "__main__":
})
print("Setup response:", setup_response)
for chunk in client.inference_stream("给我讲一个故事"):
for chunk in client.inference_stream("Tell me a story"):
print("Received chunk:", chunk)
client.stop_inference()