[fix] Fix model list retrieval

This commit is contained in:
LittleMouse
2025-03-25 11:36:12 +08:00
parent ccbf41e636
commit dcc871d145
3 changed files with 6 additions and 121 deletions
+3 -2
View File
@@ -69,7 +69,8 @@ class ModelDispatcher:
if model_config["type"] == "openai_proxy":
self.backends[model_name] = OpenAIProxyBackend(model_config)
elif model_config["type"] in ("llm", "vlm"):
while len(self.llm_models) >= 2:
count = model_config["pool_size"]
while len(self.llm_models) >= count:
oldest_model = self.llm_models.pop(0)
old_instance = self.backends.pop(oldest_model, None)
if old_instance:
@@ -307,7 +308,7 @@ async def create_translation(
@app.get("/v1/models")
async def list_models():
models_info = []
for model_name in _dispatcher.backends.keys():
for model_name in config.data["models"].keys():
model_config = config.data["models"].get(model_name, {})
models_info.append({
"id": model_name,
+1 -1
View File
@@ -20,7 +20,7 @@ class LlmClientBackend(BaseModelBackend):
self._active_clients = {}
self._pool_lock = asyncio.Lock()
self.logger = logging.getLogger("api.llm")
self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 500)
self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 200)
self.POOL_SIZE = model_config.get("pool_size", 2)
self._inference_executor = ThreadPoolExecutor(max_workers=self.POOL_SIZE)
self._active_tasks = weakref.WeakSet()
+2 -118
View File
@@ -1,119 +1,3 @@
# config.yaml
server:
host: 0.0.0.0
port: 8000
models:
llama2-7b:
type: llama.cpp
gpt-3.5-turbo-proxy:
type: openai_proxy
api_key: sk-
base_url: https://api.openai.com/v1
model: gpt-3.5-turbo
deepseek-r1:
type: openai_proxy
api_key: sk-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
model: deepseek-r1
qwen2.5-0.5B-p256-ax630c:
type: tcp_client
host: "192.168.20.56"
port: 10001
model_name: "qwen2.5-0.5B-p256-ax630c"
object: "llm.setup"
pool_size: 2
max_context_length: 128
response_format: "llm.utf-8.stream"
input: "llm.utf-8"
memory_required: 560460
system_prompt: |
You are a helpful assistant.
qwen2.5-1.5B-p256-ax630c:
type: tcp_client
host: "192.168.20.56"
port: 10001
model_name: "qwen2.5-1.5B-p256-ax630c"
object: "llm.setup"
pool_size: 1
max_context_length: 128
response_format: "llm.utf-8.stream"
input: "llm.utf-8"
memory_required: 1686216
system_prompt: |
You are a helpful assistant.
deepseek-r1-1.5B-p256-ax630c:
type: tcp_client
host: "192.168.20.56"
port: 10001
model_name: "deepseek-r1-1.5B-p256-ax630c"
object: "llm.setup"
pool_size: 1
max_context_length: 128
response_format: "llm.utf-8.stream"
input: "llm.utf-8"
memory_required: 1686552
system_prompt: |
You are a helpful assistant.
llama3.2-1B-p256-ax630c:
type: tcp_client
host: "192.168.20.56"
port: 10001
model_name: "llama3.2-1B-p256-ax630c"
object: "llm.setup"
pool_size: 2
max_context_length: 128
response_format: "llm.utf-8.stream"
input: "llm.utf-8"
memory_required: 1336288
system_prompt: |
You are a helpful assistant.
internvl2.5-1B-ax630c:
type: tcp_client
host: "192.168.20.56"
port: 10001
model_name: "internvl2.5-1B-ax630c"
object: "vlm.setup"
pool_size: 2
max_context_length: 256
response_format: "vlm.utf-8.stream"
input: "vlm.utf-8"
memory_required: 905356
system_prompt: |
You are a helpful assistant.
qwen-vl-plus:
type: vision_model
api_key: sk-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
model: qwen-vl-plus
max_image_size: 4194304
image_timeout: 20
melotts:
type: tts
host: "192.168.20.56"
port: 10001
model_name: "melotts_zh-cn"
object: "melotts.setup"
response_format: "wav.base64"
memory_required: 59764
input: "tts.utf-8"
whisper-tiny:
type: asr
host: "192.168.20.56"
port: 10001
model_name: "whisper-tiny"
object: "whisper.setup"
response_format: "asr.utf-8"
memory_required: 289132
language: "en"
input: "pcm.base64"
host: 127.0.0.1
port: 10001