You've already forked ModuleLLM-OpenAI-Plugin
mirror of
https://github.com/m5stack/ModuleLLM-OpenAI-Plugin.git
synced 2026-05-20 11:37:26 -07:00
[fix] Fix model list retrieval
This commit is contained in:
+3
-2
@@ -69,7 +69,8 @@ class ModelDispatcher:
|
||||
if model_config["type"] == "openai_proxy":
|
||||
self.backends[model_name] = OpenAIProxyBackend(model_config)
|
||||
elif model_config["type"] in ("llm", "vlm"):
|
||||
while len(self.llm_models) >= 2:
|
||||
count = model_config["pool_size"]
|
||||
while len(self.llm_models) >= count:
|
||||
oldest_model = self.llm_models.pop(0)
|
||||
old_instance = self.backends.pop(oldest_model, None)
|
||||
if old_instance:
|
||||
@@ -307,7 +308,7 @@ async def create_translation(
|
||||
@app.get("/v1/models")
|
||||
async def list_models():
|
||||
models_info = []
|
||||
for model_name in _dispatcher.backends.keys():
|
||||
for model_name in config.data["models"].keys():
|
||||
model_config = config.data["models"].get(model_name, {})
|
||||
models_info.append({
|
||||
"id": model_name,
|
||||
|
||||
@@ -20,7 +20,7 @@ class LlmClientBackend(BaseModelBackend):
|
||||
self._active_clients = {}
|
||||
self._pool_lock = asyncio.Lock()
|
||||
self.logger = logging.getLogger("api.llm")
|
||||
self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 500)
|
||||
self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 200)
|
||||
self.POOL_SIZE = model_config.get("pool_size", 2)
|
||||
self._inference_executor = ThreadPoolExecutor(max_workers=self.POOL_SIZE)
|
||||
self._active_tasks = weakref.WeakSet()
|
||||
|
||||
+2
-118
@@ -1,119 +1,3 @@
|
||||
# config.yaml
|
||||
server:
|
||||
host: 0.0.0.0
|
||||
port: 8000
|
||||
|
||||
models:
|
||||
llama2-7b:
|
||||
type: llama.cpp
|
||||
|
||||
gpt-3.5-turbo-proxy:
|
||||
type: openai_proxy
|
||||
api_key: sk-
|
||||
base_url: https://api.openai.com/v1
|
||||
model: gpt-3.5-turbo
|
||||
|
||||
deepseek-r1:
|
||||
type: openai_proxy
|
||||
api_key: sk-
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
model: deepseek-r1
|
||||
|
||||
qwen2.5-0.5B-p256-ax630c:
|
||||
type: tcp_client
|
||||
host: "192.168.20.56"
|
||||
port: 10001
|
||||
model_name: "qwen2.5-0.5B-p256-ax630c"
|
||||
object: "llm.setup"
|
||||
pool_size: 2
|
||||
max_context_length: 128
|
||||
response_format: "llm.utf-8.stream"
|
||||
input: "llm.utf-8"
|
||||
memory_required: 560460
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
||||
qwen2.5-1.5B-p256-ax630c:
|
||||
type: tcp_client
|
||||
host: "192.168.20.56"
|
||||
port: 10001
|
||||
model_name: "qwen2.5-1.5B-p256-ax630c"
|
||||
object: "llm.setup"
|
||||
pool_size: 1
|
||||
max_context_length: 128
|
||||
response_format: "llm.utf-8.stream"
|
||||
input: "llm.utf-8"
|
||||
memory_required: 1686216
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
||||
deepseek-r1-1.5B-p256-ax630c:
|
||||
type: tcp_client
|
||||
host: "192.168.20.56"
|
||||
port: 10001
|
||||
model_name: "deepseek-r1-1.5B-p256-ax630c"
|
||||
object: "llm.setup"
|
||||
pool_size: 1
|
||||
max_context_length: 128
|
||||
response_format: "llm.utf-8.stream"
|
||||
input: "llm.utf-8"
|
||||
memory_required: 1686552
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
||||
llama3.2-1B-p256-ax630c:
|
||||
type: tcp_client
|
||||
host: "192.168.20.56"
|
||||
port: 10001
|
||||
model_name: "llama3.2-1B-p256-ax630c"
|
||||
object: "llm.setup"
|
||||
pool_size: 2
|
||||
max_context_length: 128
|
||||
response_format: "llm.utf-8.stream"
|
||||
input: "llm.utf-8"
|
||||
memory_required: 1336288
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
||||
internvl2.5-1B-ax630c:
|
||||
type: tcp_client
|
||||
host: "192.168.20.56"
|
||||
port: 10001
|
||||
model_name: "internvl2.5-1B-ax630c"
|
||||
object: "vlm.setup"
|
||||
pool_size: 2
|
||||
max_context_length: 256
|
||||
response_format: "vlm.utf-8.stream"
|
||||
input: "vlm.utf-8"
|
||||
memory_required: 905356
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
||||
qwen-vl-plus:
|
||||
type: vision_model
|
||||
api_key: sk-
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
model: qwen-vl-plus
|
||||
max_image_size: 4194304
|
||||
image_timeout: 20
|
||||
|
||||
melotts:
|
||||
type: tts
|
||||
host: "192.168.20.56"
|
||||
port: 10001
|
||||
model_name: "melotts_zh-cn"
|
||||
object: "melotts.setup"
|
||||
response_format: "wav.base64"
|
||||
memory_required: 59764
|
||||
input: "tts.utf-8"
|
||||
|
||||
whisper-tiny:
|
||||
type: asr
|
||||
host: "192.168.20.56"
|
||||
port: 10001
|
||||
model_name: "whisper-tiny"
|
||||
object: "whisper.setup"
|
||||
response_format: "asr.utf-8"
|
||||
memory_required: 289132
|
||||
language: "en"
|
||||
input: "pcm.base64"
|
||||
host: 127.0.0.1
|
||||
port: 10001
|
||||
Reference in New Issue
Block a user