diff --git a/api_server.py b/api_server.py
index 9a9cfc2..642c60d 100644
--- a/api_server.py
+++ b/api_server.py
@@ -69,7 +69,8 @@ class ModelDispatcher:
                 if model_config["type"] == "openai_proxy":
                     self.backends[model_name] = OpenAIProxyBackend(model_config)
                 elif model_config["type"] in ("llm", "vlm"):
-                    while len(self.llm_models) >= 2:
+                    count = model_config["pool_size"]
+                    while len(self.llm_models) >= count:
                         oldest_model = self.llm_models.pop(0)
                         old_instance = self.backends.pop(oldest_model, None)
                         if old_instance:
@@ -307,7 +308,7 @@ async def create_translation(
 @app.get("/v1/models")
 async def list_models():
     models_info = []
-    for model_name in _dispatcher.backends.keys():
+    for model_name in config.data["models"].keys():
         model_config = config.data["models"].get(model_name, {})
         models_info.append({
             "id": model_name,
diff --git a/backend/llm_client_backend.py b/backend/llm_client_backend.py
index 3f7557c..847196a 100644
--- a/backend/llm_client_backend.py
+++ b/backend/llm_client_backend.py
@@ -20,7 +20,7 @@ class LlmClientBackend(BaseModelBackend):
         self._active_clients = {}
         self._pool_lock = asyncio.Lock()
         self.logger = logging.getLogger("api.llm")
-        self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 500)
+        self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 200)
         self.POOL_SIZE = model_config.get("pool_size", 2)
         self._inference_executor = ThreadPoolExecutor(max_workers=self.POOL_SIZE)
         self._active_tasks = weakref.WeakSet()
diff --git a/config/config.yaml b/config/config.yaml
index c1e8153..a7d5b83 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -1,119 +1,3 @@
-# config.yaml
 server:
-  host: 0.0.0.0
-  port: 8000
-
-models:
-  llama2-7b:
-    type: llama.cpp
-
-  gpt-3.5-turbo-proxy:
-    type: openai_proxy
-    api_key: sk-
-    base_url: https://api.openai.com/v1
-    model: gpt-3.5-turbo
-
-  deepseek-r1:
-    type: openai_proxy
-    api_key: sk-
-    base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
-    model: deepseek-r1
-
-  qwen2.5-0.5B-p256-ax630c:
-    type: tcp_client
-    host: "192.168.20.56" 
-    port: 10001
-    model_name: "qwen2.5-0.5B-p256-ax630c"
-    object: "llm.setup"
-    pool_size: 2
-    max_context_length: 128
-    response_format: "llm.utf-8.stream"
-    input: "llm.utf-8"
-    memory_required: 560460
-    system_prompt: |
-      You are a helpful assistant. 
-
-  qwen2.5-1.5B-p256-ax630c:
-    type: tcp_client
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "qwen2.5-1.5B-p256-ax630c"
-    object: "llm.setup"
-    pool_size: 1
-    max_context_length: 128
-    response_format: "llm.utf-8.stream"
-    input: "llm.utf-8"
-    memory_required: 1686216
-    system_prompt: |
-      You are a helpful assistant.
-
-  deepseek-r1-1.5B-p256-ax630c:
-    type: tcp_client
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "deepseek-r1-1.5B-p256-ax630c"
-    object: "llm.setup"
-    pool_size: 1
-    max_context_length: 128
-    response_format: "llm.utf-8.stream"
-    input: "llm.utf-8"
-    memory_required: 1686552
-    system_prompt: |
-      You are a helpful assistant.
-
-  llama3.2-1B-p256-ax630c:
-    type: tcp_client
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "llama3.2-1B-p256-ax630c"
-    object: "llm.setup"
-    pool_size: 2
-    max_context_length: 128
-    response_format: "llm.utf-8.stream"
-    input: "llm.utf-8"
-    memory_required: 1336288
-    system_prompt: |
-      You are a helpful assistant.
-
-  internvl2.5-1B-ax630c:
-    type: tcp_client
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "internvl2.5-1B-ax630c"
-    object: "vlm.setup"
-    pool_size: 2
-    max_context_length: 256
-    response_format: "vlm.utf-8.stream"
-    input: "vlm.utf-8"
-    memory_required: 905356
-    system_prompt: |
-      You are a helpful assistant.
-
-  qwen-vl-plus:
-    type: vision_model
-    api_key: sk-
-    base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
-    model: qwen-vl-plus
-    max_image_size: 4194304
-    image_timeout: 20
-
-  melotts:
-    type: tts
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "melotts_zh-cn"
-    object: "melotts.setup"
-    response_format: "wav.base64"
-    memory_required: 59764
-    input: "tts.utf-8"
-
-  whisper-tiny:
-    type: asr
-    host: "192.168.20.56"
-    port: 10001
-    model_name: "whisper-tiny"
-    object: "whisper.setup"
-    response_format: "asr.utf-8"
-    memory_required: 289132
-    language: "en"
-    input: "pcm.base64"
\ No newline at end of file
+  host: 127.0.0.1
+  port: 10001
\ No newline at end of file