Files
FrameTour-RenderWorker/util/system.py
Jerry Yan 0cc96a968b feat(gpu): 添加多显卡调度支持
- 新增 GPUDevice 数据类定义 GPU 设备信息
- 扩展 WorkerConfig 添加 gpu_devices 配置项
- 从环境变量 GPU_DEVICES 读取多显卡设备配置
- 实现 GPUScheduler 提供轮询调度功能
- 修改 FFmpeg 参数生成支持设备指定
- 添加线程本地存储管理当前 GPU 设备
- 更新任务执行器集成 GPU 设备分配
- 实现 GPU 设备自动检测和验证功能
- 添加相关日志记录和状态监控
2026-01-19 18:34:03 +08:00

346 lines
8.6 KiB
Python

# -*- coding: utf-8 -*-
"""
系统信息工具
提供系统信息采集功能。
"""
import logging
import os
import platform
import subprocess
from typing import Optional, Dict, Any, List
import psutil
from constant import SOFTWARE_VERSION, DEFAULT_CAPABILITIES, HW_ACCEL_NONE, HW_ACCEL_QSV, HW_ACCEL_CUDA
from domain.gpu import GPUDevice
logger = logging.getLogger(__name__)
def get_sys_info():
"""
获取系统信息
Returns:
dict: 系统信息字典
"""
mem = psutil.virtual_memory()
info = {
'os': platform.system(),
'cpu': f"{os.cpu_count()} cores",
'memory': f"{mem.total // (1024**3)}GB",
'cpuUsage': f"{psutil.cpu_percent()}%",
'memoryAvailable': f"{mem.available // (1024**3)}GB",
'platform': platform.system(),
'pythonVersion': platform.python_version(),
'version': SOFTWARE_VERSION,
}
# 尝试获取 GPU 信息
gpu_info = get_gpu_info()
if gpu_info:
info['gpu'] = gpu_info
return info
def get_capabilities():
"""
获取 Worker 支持的能力列表
Returns:
list: 能力列表
"""
return DEFAULT_CAPABILITIES.copy()
def get_gpu_info() -> Optional[str]:
"""
尝试获取 GPU 信息
Returns:
str: GPU 信息,失败返回 None
"""
try:
# 尝试使用 nvidia-smi
result = subprocess.run(
['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
gpu_name = result.stdout.strip().split('\n')[0]
return gpu_name
except Exception:
pass
return None
def get_ffmpeg_version() -> str:
"""
获取 FFmpeg 版本
Returns:
str: FFmpeg 版本号
"""
try:
result = subprocess.run(
['ffmpeg', '-version'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
first_line = result.stdout.split('\n')[0]
# 解析版本号,例如 "ffmpeg version 6.0 ..."
parts = first_line.split()
for i, part in enumerate(parts):
if part == 'version' and i + 1 < len(parts):
return parts[i + 1]
except Exception:
pass
return 'unknown'
def check_ffmpeg_encoder(encoder: str) -> bool:
"""
检查 FFmpeg 是否支持指定的编码器
Args:
encoder: 编码器名称,如 'h264_nvenc', 'h264_qsv'
Returns:
bool: 是否支持该编码器
"""
try:
result = subprocess.run(
['ffmpeg', '-hide_banner', '-encoders'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return encoder in result.stdout
except Exception:
pass
return False
def check_ffmpeg_decoder(decoder: str) -> bool:
"""
检查 FFmpeg 是否支持指定的解码器
Args:
decoder: 解码器名称,如 'h264_cuvid', 'h264_qsv'
Returns:
bool: 是否支持该解码器
"""
try:
result = subprocess.run(
['ffmpeg', '-hide_banner', '-decoders'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return decoder in result.stdout
except Exception:
pass
return False
def check_ffmpeg_hwaccel(hwaccel: str) -> bool:
"""
检查 FFmpeg 是否支持指定的硬件加速方法
Args:
hwaccel: 硬件加速方法,如 'cuda', 'qsv', 'dxva2', 'd3d11va'
Returns:
bool: 是否支持该硬件加速方法
"""
try:
result = subprocess.run(
['ffmpeg', '-hide_banner', '-hwaccels'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return hwaccel in result.stdout
except Exception:
pass
return False
def detect_hw_accel_support() -> Dict[str, Any]:
"""
检测系统的硬件加速支持情况
Returns:
dict: 硬件加速支持信息
{
'cuda': {
'available': bool,
'gpu': str or None,
'encoder': bool, # h264_nvenc
'decoder': bool, # h264_cuvid
},
'qsv': {
'available': bool,
'encoder': bool, # h264_qsv
'decoder': bool, # h264_qsv
},
'recommended': str # 推荐的加速方式: 'cuda', 'qsv', 'none'
}
"""
result = {
'cuda': {
'available': False,
'gpu': None,
'encoder': False,
'decoder': False,
},
'qsv': {
'available': False,
'encoder': False,
'decoder': False,
},
'recommended': HW_ACCEL_NONE
}
# 检测 CUDA/NVENC 支持
gpu_info = get_gpu_info()
if gpu_info:
result['cuda']['gpu'] = gpu_info
result['cuda']['available'] = check_ffmpeg_hwaccel('cuda')
result['cuda']['encoder'] = check_ffmpeg_encoder('h264_nvenc')
result['cuda']['decoder'] = check_ffmpeg_decoder('h264_cuvid')
# 检测 QSV 支持
result['qsv']['available'] = check_ffmpeg_hwaccel('qsv')
result['qsv']['encoder'] = check_ffmpeg_encoder('h264_qsv')
result['qsv']['decoder'] = check_ffmpeg_decoder('h264_qsv')
# 推荐硬件加速方式(优先 CUDA,其次 QSV)
if result['cuda']['available'] and result['cuda']['encoder']:
result['recommended'] = HW_ACCEL_CUDA
elif result['qsv']['available'] and result['qsv']['encoder']:
result['recommended'] = HW_ACCEL_QSV
return result
def get_hw_accel_info_str() -> str:
"""
获取硬件加速支持信息的可读字符串
Returns:
str: 硬件加速支持信息描述
"""
support = detect_hw_accel_support()
parts = []
if support['cuda']['available']:
gpu = support['cuda']['gpu'] or 'Unknown GPU'
status = 'encoder+decoder' if support['cuda']['encoder'] and support['cuda']['decoder'] else (
'encoder only' if support['cuda']['encoder'] else 'decoder only' if support['cuda']['decoder'] else 'hwaccel only'
)
parts.append(f"CUDA({gpu}, {status})")
if support['qsv']['available']:
status = 'encoder+decoder' if support['qsv']['encoder'] and support['qsv']['decoder'] else (
'encoder only' if support['qsv']['encoder'] else 'decoder only' if support['qsv']['decoder'] else 'hwaccel only'
)
parts.append(f"QSV({status})")
if not parts:
return "No hardware acceleration available"
return ', '.join(parts) + f" [recommended: {support['recommended']}]"
def get_all_gpu_info() -> List[GPUDevice]:
"""
获取所有 NVIDIA GPU 信息
使用 nvidia-smi 查询所有 GPU 设备。
Returns:
GPU 设备列表,失败返回空列表
"""
try:
result = subprocess.run(
[
'nvidia-smi',
'--query-gpu=index,name,memory.total',
'--format=csv,noheader,nounits'
],
capture_output=True,
text=True,
timeout=10
)
if result.returncode != 0:
return []
devices = []
for line in result.stdout.strip().split('\n'):
if not line.strip():
continue
parts = [p.strip() for p in line.split(',')]
if len(parts) >= 2:
index = int(parts[0])
name = parts[1]
memory = int(parts[2]) if len(parts) >= 3 else None
devices.append(GPUDevice(
index=index,
name=name,
memory_total=memory,
available=True
))
return devices
except Exception as e:
logger.warning(f"Failed to detect GPUs: {e}")
return []
def validate_gpu_device(index: int) -> bool:
"""
验证指定索引的 GPU 设备是否可用
Args:
index: GPU 设备索引
Returns:
设备是否可用
"""
try:
result = subprocess.run(
[
'nvidia-smi',
'-i', str(index),
'--query-gpu=name',
'--format=csv,noheader'
],
capture_output=True,
text=True,
timeout=5
)
return result.returncode == 0 and bool(result.stdout.strip())
except Exception:
return False