From 8fc4455b579d86b253df6effcd771c8fc111a32a Mon Sep 17 00:00:00 2001
From: Dallas98 <40557804+Dallas98@users.noreply.github.com>
Date: Mon, 22 Dec 2025 09:29:00 +0800
Subject: [PATCH] =?UTF-8?q?=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6=E6=9B=B4?=
 =?UTF-8?q?=E6=94=B9=20(#186)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(generation_service): add image URL extraction and random QA generation logic

* fix(generation_service): increase batch size from 20 to 100 for improved chunk processing

* fix(generation_service): increase batch size from 20 to 100 for improved chunk processing
---
 .../app/module/generation/service/generation_service.py       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/runtime/datamate-python/app/module/generation/service/generation_service.py b/runtime/datamate-python/app/module/generation/service/generation_service.py
index 9178c9e..2a085fd 100644
--- a/runtime/datamate-python/app/module/generation/service/generation_service.py
+++ b/runtime/datamate-python/app/module/generation/service/generation_service.py
@@ -49,7 +49,7 @@ class GenerationService:
     def __init__(self, db: AsyncSession):
         self.db = db
         # 全局并发信号量：保证任意时刻最多 10 次模型调用
-        self.question_semaphore = asyncio.Semaphore(10)
+        self.question_semaphore = asyncio.Semaphore(20)
         self.answer_semaphore = asyncio.Semaphore(100)
 
     async def process_task(self, task_id: str):
@@ -175,7 +175,7 @@ class GenerationService:
         answer_chat = get_chat_client(answer_model)
 
         # 分批次从 DB 读取并处理 chunk
-        batch_size = 20
+        batch_size = 100
         current_index = 1
 
         while current_index <= total_chunks: