[fix] Fix model list retrieval

2026-05-20 11:37:26 -07:00 · 2025-03-25 11:36:12 +08:00
parent ccbf41e636
commit dcc871d145
3 changed files with 6 additions and 121 deletions
@@ -69,7 +69,8 @@ class ModelDispatcher:
                if model_config["type"] == "openai_proxy":
                    self.backends[model_name] = OpenAIProxyBackend(model_config)
                elif model_config["type"] in ("llm", "vlm"):
-                    while len(self.llm_models) >= 2:
+                    count = model_config["pool_size"]
+                    while len(self.llm_models) >= count:
                        oldest_model = self.llm_models.pop(0)
                        old_instance = self.backends.pop(oldest_model, None)
                        if old_instance:
@@ -307,7 +308,7 @@ async def create_translation(
@app.get("/v1/models")
 async def list_models():
    models_info = []
-    for model_name in _dispatcher.backends.keys():
+    for model_name in config.data["models"].keys():
        model_config = config.data["models"].get(model_name, {})
        models_info.append({
            "id": model_name,
@@ -20,7 +20,7 @@ class LlmClientBackend(BaseModelBackend):
        self._active_clients = {}
        self._pool_lock = asyncio.Lock()
        self.logger = logging.getLogger("api.llm")
-        self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 500)
+        self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 200)
        self.POOL_SIZE = model_config.get("pool_size", 2)
        self._inference_executor = ThreadPoolExecutor(max_workers=self.POOL_SIZE)
        self._active_tasks = weakref.WeakSet()
@@ -1,119 +1,3 @@
-# config.yaml
 server:
-  host: 0.0.0.0
-  port: 8000
-
-models:
-  llama2-7b:
-    type: llama.cpp
-
-  gpt-3.5-turbo-proxy:
-    type: openai_proxy
-    api_key: sk-
-    base_url: https://api.openai.com/v1
-    model: gpt-3.5-turbo
-
-  deepseek-r1:
-    type: openai_proxy
-    api_key: sk-
-    base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
-    model: deepseek-r1
-
-  qwen2.5-0.5B-p256-ax630c:
-    type: tcp_client
-    host: "192.168.20.56" 
-    port: 10001
-    model_name: "qwen2.5-0.5B-p256-ax630c"
-    object: "llm.setup"
-    pool_size: 2
-    max_context_length: 128
-    response_format: "llm.utf-8.stream"
-    input: "llm.utf-8"
-    memory_required: 560460
-    system_prompt: |
-      You are a helpful assistant. 
-
-  qwen2.5-1.5B-p256-ax630c:
-    type: tcp_client
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "qwen2.5-1.5B-p256-ax630c"
-    object: "llm.setup"
-    pool_size: 1
-    max_context_length: 128
-    response_format: "llm.utf-8.stream"
-    input: "llm.utf-8"
-    memory_required: 1686216
-    system_prompt: |
-      You are a helpful assistant.
-
-  deepseek-r1-1.5B-p256-ax630c:
-    type: tcp_client
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "deepseek-r1-1.5B-p256-ax630c"
-    object: "llm.setup"
-    pool_size: 1
-    max_context_length: 128
-    response_format: "llm.utf-8.stream"
-    input: "llm.utf-8"
-    memory_required: 1686552
-    system_prompt: |
-      You are a helpful assistant.
-
-  llama3.2-1B-p256-ax630c:
-    type: tcp_client
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "llama3.2-1B-p256-ax630c"
-    object: "llm.setup"
-    pool_size: 2
-    max_context_length: 128
-    response_format: "llm.utf-8.stream"
-    input: "llm.utf-8"
-    memory_required: 1336288
-    system_prompt: |
-      You are a helpful assistant.
-
-  internvl2.5-1B-ax630c:
-    type: tcp_client
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "internvl2.5-1B-ax630c"
-    object: "vlm.setup"
-    pool_size: 2
-    max_context_length: 256
-    response_format: "vlm.utf-8.stream"
-    input: "vlm.utf-8"
-    memory_required: 905356
-    system_prompt: |
-      You are a helpful assistant.
-
-  qwen-vl-plus:
-    type: vision_model
-    api_key: sk-
-    base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
-    model: qwen-vl-plus
-    max_image_size: 4194304
-    image_timeout: 20
-
-  melotts:
-    type: tts
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "melotts_zh-cn"
-    object: "melotts.setup"
-    response_format: "wav.base64"
-    memory_required: 59764
-    input: "tts.utf-8"
-
-  whisper-tiny:
-    type: asr
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "whisper-tiny"
-    object: "whisper.setup"
-    response_format: "asr.utf-8"
-    memory_required: 289132
-    language: "en"
-    input: "pcm.base64"
+  host: 127.0.0.1
+  port: 10001