diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bac87f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +*.pyc +__pycache__/ +*.pyo +*.pyd + +.DS_Store +.idea/ +.vscode/ +*.swp +*.swo + +.env +.venv/ +venv/ +ENV/ + +images/ +json/ +logs/ +legacy/ +test/ +txt/ +*.log +*.sqlite +*.db + +.coverage +htmlcov/ \ No newline at end of file diff --git a/api_server.py b/api_server.py index cd933e1..5dc2a04 100644 --- a/api_server.py +++ b/api_server.py @@ -98,13 +98,13 @@ async def chat_completions(request: Request, body: ChatCompletionRequest): print(f"Sending chunk: {json_chunk}") yield f"data: {json_chunk}\n\n" except asyncio.CancelledError: - logger.warning("客户端提前断开连接,正在终止推理...") + logger.warning("Client disconnected early, terminating inference...") if backend and isinstance(backend, LlmClientBackend): for task in backend._active_tasks: task.cancel() raise finally: - logger.debug("流连接已关闭") + logger.debug("Stream connection closed") return StreamingResponse( format_stream(), @@ -142,7 +142,7 @@ async def create_completion(request: Request, body: CompletionRequest): async def convert_stream(): async for chunk in chunk_generator: - # 转换格式后需要序列化为JSON字符串 + # Convert format and serialize to JSON string completion_chunk = { "id": chunk.get("id", f"cmpl-{uuid.uuid4()}"), "object": "text_completion.chunk", @@ -155,10 +155,8 @@ async def create_completion(request: Request, body: CompletionRequest): "finish_reason": chunk["choices"][0].get("finish_reason") }] } - # 添加SSE格式包装 yield f"data: {json.dumps(completion_chunk)}\n\n" - # 添加流结束标记 yield "data: [DONE]\n\n" return StreamingResponse( diff --git a/backend/base_model_backend.py b/backend/base_model_backend.py index 8dd114a..e32d533 100644 --- a/backend/base_model_backend.py +++ b/backend/base_model_backend.py @@ -1,6 +1,6 @@ from pydantic import BaseModel from typing import Optional, List, Union -from .chat_schemas import ChatCompletionRequest # Note: You'll need to move the request models to a schemas.py file +from .chat_schemas import ChatCompletionRequest class BaseModelBackend: def __init__(self, model_config): diff --git a/backend/llm_client_backend.py b/backend/llm_client_backend.py index cb5b16a..deb1bca 100644 --- a/backend/llm_client_backend.py +++ b/backend/llm_client_backend.py @@ -107,7 +107,7 @@ class LlmClientBackend(BaseModelBackend): loop = asyncio.get_event_loop() for i, image_data in enumerate(base64_images): message = client.send_jpeg(image_data, object_type="vlm.jpeg.base64") - print(f"发送第 {i+1} 张JPEG数据: {message[:20]}...") + print(f"Sending JPEG data #{i+1}: {message[:20]}...") sync_gen = client.inference_stream( query, @@ -170,10 +170,10 @@ class LlmClientBackend(BaseModelBackend): if response.status == 200: image_data = await response.read() return base64.b64encode(image_data).decode('utf-8') - self.logger.error(f"图片下载失败,状态码:{response.status}") + self.logger.error(f"Image download failed, status code: {response.status}") return None except Exception as e: - self.logger.error(f"图片下载异常:{str(e)}") + self.logger.error(f"Image download error: {str(e)}") return None async def generate(self, request: ChatCompletionRequest): diff --git a/backend/vision_model_backend.py b/backend/vision_model_backend.py index a28342c..151e42a 100644 --- a/backend/vision_model_backend.py +++ b/backend/vision_model_backend.py @@ -21,7 +21,7 @@ class VisionModelBackend(BaseModelBackend): "image_url": {"url": url} } - # 下载外部图片并转换为base64 + # Download external image and convert to base64 base64_str = await self.download_image( url, max_size=self.MAX_IMAGE_SIZE, @@ -30,7 +30,7 @@ class VisionModelBackend(BaseModelBackend): if not base64_str: raise HTTPException( status_code=400, - detail=f"无法加载图片: {url}" + detail=f"Failed to load image: {url}" ) return { @@ -94,12 +94,12 @@ class VisionModelBackend(BaseModelBackend): if request.stream: async def stream_wrapper(): async for chunk in response: - # 统一错误处理 + # Unified error handling if isinstance(chunk, dict) and "error" in chunk: yield chunk continue - # 转换为兼容格式 + # Convert to compatible format yield { "id": f"chatcmpl-{uuid.uuid4()}", "object": "chat.completion.chunk", @@ -117,7 +117,7 @@ class VisionModelBackend(BaseModelBackend): yield {"choices": [{"delta": {}, "finish_reason": "stop"}]} return stream_wrapper() - # 非流式响应添加usage信息 + # Add usage info for non-stream response return { "id": f"chatcmpl-{uuid.uuid4()}", "object": "chat.completion", diff --git a/client/llm_client.py b/client/llm_client.py index b5a6f54..60d4107 100644 --- a/client/llm_client.py +++ b/client/llm_client.py @@ -32,7 +32,7 @@ class LLMClient: try: self.sock.connect((self.host, self.port)) except ConnectionRefusedError as e: - raise RuntimeError(f"无法连接到 {self.host}:{self.port}") from e + raise RuntimeError(f"Failed to connect to {self.host}:{self.port}") from e def close(self): if self.sock: @@ -120,7 +120,7 @@ if __name__ == "__main__": }) print("Setup response:", setup_response) - for chunk in client.inference_stream("给我讲一个故事"): + for chunk in client.inference_stream("Tell me a story"): print("Received chunk:", chunk) client.stop_inference()