You've already forked FrameTour-RenderWorker
feat(gpu): 添加多显卡调度支持
- 新增 GPUDevice 数据类定义 GPU 设备信息 - 扩展 WorkerConfig 添加 gpu_devices 配置项 - 从环境变量 GPU_DEVICES 读取多显卡设备配置 - 实现 GPUScheduler 提供轮询调度功能 - 修改 FFmpeg 参数生成支持设备指定 - 添加线程本地存储管理当前 GPU 设备 - 更新任务执行器集成 GPU 设备分配 - 实现 GPU 设备自动检测和验证功能 - 添加相关日志记录和状态监控
This commit is contained in:
@@ -15,6 +15,7 @@ from domain.result import TaskResult, ErrorCode
|
||||
from domain.config import WorkerConfig
|
||||
from core.handler import TaskHandler
|
||||
from services.lease_service import LeaseService
|
||||
from services.gpu_scheduler import GPUScheduler
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from services.api_client import APIClientV2
|
||||
@@ -60,6 +61,12 @@ class TaskExecutor:
|
||||
# 线程安全锁
|
||||
self.lock = threading.Lock()
|
||||
|
||||
# GPU 调度器(如果启用硬件加速)
|
||||
self.gpu_scheduler = GPUScheduler(config)
|
||||
|
||||
if self.gpu_scheduler.enabled:
|
||||
logger.info(f"GPU scheduler enabled with {self.gpu_scheduler.device_count} device(s)")
|
||||
|
||||
# 注册处理器
|
||||
self._register_handlers()
|
||||
|
||||
@@ -164,15 +171,27 @@ class TaskExecutor:
|
||||
)
|
||||
lease_service.start()
|
||||
|
||||
# 获取 GPU 设备
|
||||
device_index = None
|
||||
if self.gpu_scheduler.enabled:
|
||||
device_index = self.gpu_scheduler.acquire()
|
||||
if device_index is not None:
|
||||
logger.info(f"[task:{task_id}] Assigned to GPU device {device_index}")
|
||||
|
||||
# 获取处理器(需要在设置 GPU 设备前获取)
|
||||
handler = self.handlers.get(task.task_type)
|
||||
|
||||
try:
|
||||
# 报告任务开始
|
||||
self.api_client.report_start(task_id)
|
||||
|
||||
# 获取处理器
|
||||
handler = self.handlers.get(task.task_type)
|
||||
if not handler:
|
||||
raise ValueError(f"No handler for task type: {task.task_type}")
|
||||
|
||||
# 设置 GPU 设备(线程本地存储)
|
||||
if device_index is not None:
|
||||
handler.set_gpu_device(device_index)
|
||||
|
||||
# 执行前钩子
|
||||
handler.before_handle(task)
|
||||
|
||||
@@ -196,6 +215,14 @@ class TaskExecutor:
|
||||
self.api_client.report_fail(task_id, 'E_UNKNOWN', str(e))
|
||||
|
||||
finally:
|
||||
# 清除 GPU 设备设置
|
||||
if handler:
|
||||
handler.clear_gpu_device()
|
||||
|
||||
# 释放 GPU 设备
|
||||
if self.gpu_scheduler.enabled:
|
||||
self.gpu_scheduler.release(device_index)
|
||||
|
||||
# 停止租约续期
|
||||
lease_service.stop()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user