You've already forked FrameTour-RenderWorker
feat(gpu): 添加多显卡调度支持
- 新增 GPUDevice 数据类定义 GPU 设备信息 - 扩展 WorkerConfig 添加 gpu_devices 配置项 - 从环境变量 GPU_DEVICES 读取多显卡设备配置 - 实现 GPUScheduler 提供轮询调度功能 - 修改 FFmpeg 参数生成支持设备指定 - 添加线程本地存储管理当前 GPU 设备 - 更新任务执行器集成 GPU 设备分配 - 实现 GPU 设备自动检测和验证功能 - 添加相关日志记录和状态监控
This commit is contained in:
@@ -5,13 +5,17 @@
|
||||
提供系统信息采集功能。
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Optional, Dict, Any
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
import psutil
|
||||
from constant import SOFTWARE_VERSION, DEFAULT_CAPABILITIES, HW_ACCEL_NONE, HW_ACCEL_QSV, HW_ACCEL_CUDA
|
||||
from domain.gpu import GPUDevice
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_sys_info():
|
||||
@@ -264,3 +268,78 @@ def get_hw_accel_info_str() -> str:
|
||||
return "No hardware acceleration available"
|
||||
|
||||
return ', '.join(parts) + f" [recommended: {support['recommended']}]"
|
||||
|
||||
|
||||
def get_all_gpu_info() -> List[GPUDevice]:
|
||||
"""
|
||||
获取所有 NVIDIA GPU 信息
|
||||
|
||||
使用 nvidia-smi 查询所有 GPU 设备。
|
||||
|
||||
Returns:
|
||||
GPU 设备列表,失败返回空列表
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
'nvidia-smi',
|
||||
'--query-gpu=index,name,memory.total',
|
||||
'--format=csv,noheader,nounits'
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
|
||||
devices = []
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if not line.strip():
|
||||
continue
|
||||
parts = [p.strip() for p in line.split(',')]
|
||||
if len(parts) >= 2:
|
||||
index = int(parts[0])
|
||||
name = parts[1]
|
||||
memory = int(parts[2]) if len(parts) >= 3 else None
|
||||
devices.append(GPUDevice(
|
||||
index=index,
|
||||
name=name,
|
||||
memory_total=memory,
|
||||
available=True
|
||||
))
|
||||
|
||||
return devices
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to detect GPUs: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def validate_gpu_device(index: int) -> bool:
|
||||
"""
|
||||
验证指定索引的 GPU 设备是否可用
|
||||
|
||||
Args:
|
||||
index: GPU 设备索引
|
||||
|
||||
Returns:
|
||||
设备是否可用
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
'nvidia-smi',
|
||||
'-i', str(index),
|
||||
'--query-gpu=name',
|
||||
'--format=csv,noheader'
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
return result.returncode == 0 and bool(result.stdout.strip())
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user