feat(video): 添加硬件加速支持

- 定义硬件加速类型常量(none、qsv、cuda)
- 配置QSV和CUDA编码参数及预设
- 在WorkerConfig中添加硬件加速配置选项
- 实现基于硬件加速类型的编码参数动态获取
- 添加FFmpeg硬件加速解码和滤镜参数
- 检测并报告系统硬件加速支持信息
- 在API客户端中上报硬件加速配置和支持状态
This commit is contained in:
2026-01-13 13:34:27 +08:00
parent a26c44a3cd
commit 71bd2e59f9
7 changed files with 364 additions and 22 deletions

View File

@@ -41,7 +41,14 @@ EFFECT_TYPES = (
'blur', # 模糊效果(预留) 'blur', # 模糊效果(预留)
) )
# 统一视频编码参数(来自集成文档) # 硬件加速类型
HW_ACCEL_NONE = 'none' # 纯软件编解码
HW_ACCEL_QSV = 'qsv' # Intel Quick Sync Video (核显/独显)
HW_ACCEL_CUDA = 'cuda' # NVIDIA NVENC/NVDEC
HW_ACCEL_TYPES = (HW_ACCEL_NONE, HW_ACCEL_QSV, HW_ACCEL_CUDA)
# 统一视频编码参数(软件编码,来自集成文档)
VIDEO_ENCODE_PARAMS = { VIDEO_ENCODE_PARAMS = {
'codec': 'libx264', 'codec': 'libx264',
'preset': 'medium', 'preset': 'medium',
@@ -51,6 +58,28 @@ VIDEO_ENCODE_PARAMS = {
'pix_fmt': 'yuv420p', 'pix_fmt': 'yuv420p',
} }
# QSV 硬件加速视频编码参数(Intel Quick Sync)
VIDEO_ENCODE_PARAMS_QSV = {
'codec': 'h264_qsv',
'preset': 'medium', # QSV 支持: veryfast, faster, fast, medium, slow, slower, veryslow
'profile': 'main',
'level': '4.0',
'global_quality': '23', # QSV 使用 global_quality 代替 crf(1-51,值越低质量越高)
'look_ahead': '1', # 启用前瞻分析提升质量
'pix_fmt': 'nv12', # QSV 硬件表面格式
}
# CUDA 硬件加速视频编码参数(NVIDIA NVENC)
VIDEO_ENCODE_PARAMS_CUDA = {
'codec': 'h264_nvenc',
'preset': 'p4', # NVENC 预设 p1-p7(p1 最快,p7 最慢/质量最高),p4 ≈ medium
'profile': 'main',
'level': '4.0',
'rc': 'vbr', # 码率控制模式:vbr 可变码率
'cq': '23', # 恒定质量模式的质量值(0-51)
'pix_fmt': 'yuv420p', # NVENC 输入格式(会自动转换)
}
# 统一音频编码参数 # 统一音频编码参数
AUDIO_ENCODE_PARAMS = { AUDIO_ENCODE_PARAMS = {
'codec': 'aac', 'codec': 'aac',

View File

@@ -9,6 +9,8 @@ import os
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional from typing import List, Optional
from constant import HW_ACCEL_NONE, HW_ACCEL_QSV, HW_ACCEL_CUDA, HW_ACCEL_TYPES
# 默认支持的任务类型 # 默认支持的任务类型
DEFAULT_CAPABILITIES = [ DEFAULT_CAPABILITIES = [
@@ -54,6 +56,9 @@ class WorkerConfig:
download_timeout: int = 300 # 秒,下载超时 download_timeout: int = 300 # 秒,下载超时
upload_timeout: int = 600 # 秒,上传超时 upload_timeout: int = 600 # 秒,上传超时
# 硬件加速配置
hw_accel: str = HW_ACCEL_NONE # 硬件加速类型: none, qsv, cuda
@classmethod @classmethod
def from_env(cls) -> 'WorkerConfig': def from_env(cls) -> 'WorkerConfig':
"""从环境变量创建配置""" """从环境变量创建配置"""
@@ -98,6 +103,11 @@ class WorkerConfig:
download_timeout = int(os.getenv('DOWNLOAD_TIMEOUT', '300')) download_timeout = int(os.getenv('DOWNLOAD_TIMEOUT', '300'))
upload_timeout = int(os.getenv('UPLOAD_TIMEOUT', '600')) upload_timeout = int(os.getenv('UPLOAD_TIMEOUT', '600'))
# 硬件加速配置
hw_accel = os.getenv('HW_ACCEL', HW_ACCEL_NONE).lower()
if hw_accel not in HW_ACCEL_TYPES:
hw_accel = HW_ACCEL_NONE
return cls( return cls(
api_endpoint=api_endpoint, api_endpoint=api_endpoint,
access_key=access_key, access_key=access_key,
@@ -110,7 +120,8 @@ class WorkerConfig:
capabilities=capabilities, capabilities=capabilities,
ffmpeg_timeout=ffmpeg_timeout, ffmpeg_timeout=ffmpeg_timeout,
download_timeout=download_timeout, download_timeout=download_timeout,
upload_timeout=upload_timeout upload_timeout=upload_timeout,
hw_accel=hw_accel
) )
def get_work_dir_path(self, task_id: str) -> str: def get_work_dir_path(self, task_id: str) -> str:
@@ -120,3 +131,15 @@ class WorkerConfig:
def ensure_temp_dir(self) -> None: def ensure_temp_dir(self) -> None:
"""确保临时目录存在""" """确保临时目录存在"""
os.makedirs(self.temp_dir, exist_ok=True) os.makedirs(self.temp_dir, exist_ok=True)
def is_hw_accel_enabled(self) -> bool:
"""是否启用了硬件加速"""
return self.hw_accel != HW_ACCEL_NONE
def is_qsv(self) -> bool:
"""是否使用 QSV 硬件加速"""
return self.hw_accel == HW_ACCEL_QSV
def is_cuda(self) -> bool:
"""是否使用 CUDA 硬件加速"""
return self.hw_accel == HW_ACCEL_CUDA

View File

@@ -19,6 +19,10 @@ from domain.task import Task
from domain.result import TaskResult, ErrorCode from domain.result import TaskResult, ErrorCode
from domain.config import WorkerConfig from domain.config import WorkerConfig
from services import storage from services import storage
from constant import (
HW_ACCEL_NONE, HW_ACCEL_QSV, HW_ACCEL_CUDA,
VIDEO_ENCODE_PARAMS, VIDEO_ENCODE_PARAMS_QSV, VIDEO_ENCODE_PARAMS_CUDA
)
if TYPE_CHECKING: if TYPE_CHECKING:
from services.api_client import APIClientV2 from services.api_client import APIClientV2
@@ -26,15 +30,94 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# v2 统一视频编码参数(来自集成文档) def get_video_encode_args(hw_accel: str = HW_ACCEL_NONE) -> List[str]:
VIDEO_ENCODE_ARGS = [ """
'-c:v', 'libx264', 根据硬件加速配置获取视频编码参数
'-preset', 'medium',
'-profile:v', 'main', Args:
'-level', '4.0', hw_accel: 硬件加速类型 (none, qsv, cuda)
'-crf', '23',
'-pix_fmt', 'yuv420p', Returns:
] FFmpeg 视频编码参数列表
"""
if hw_accel == HW_ACCEL_QSV:
params = VIDEO_ENCODE_PARAMS_QSV
return [
'-c:v', params['codec'],
'-preset', params['preset'],
'-profile:v', params['profile'],
'-level', params['level'],
'-global_quality', params['global_quality'],
'-look_ahead', params['look_ahead'],
]
elif hw_accel == HW_ACCEL_CUDA:
params = VIDEO_ENCODE_PARAMS_CUDA
return [
'-c:v', params['codec'],
'-preset', params['preset'],
'-profile:v', params['profile'],
'-level', params['level'],
'-rc', params['rc'],
'-cq', params['cq'],
'-b:v', '0', # 配合 vbr 模式使用 cq
]
else:
# 软件编码(默认)
params = VIDEO_ENCODE_PARAMS
return [
'-c:v', params['codec'],
'-preset', params['preset'],
'-profile:v', params['profile'],
'-level', params['level'],
'-crf', params['crf'],
'-pix_fmt', params['pix_fmt'],
]
def get_hwaccel_decode_args(hw_accel: str = HW_ACCEL_NONE) -> List[str]:
"""
获取硬件加速解码参数(输入文件之前使用)
Args:
hw_accel: 硬件加速类型 (none, qsv, cuda)
Returns:
FFmpeg 硬件加速解码参数列表
"""
if hw_accel == HW_ACCEL_CUDA:
# CUDA 硬件加速解码
# 注意:使用 cuda 作为 hwaccel,但输出到系统内存以便 CPU 滤镜处理
return ['-hwaccel', 'cuda', '-hwaccel_output_format', 'cuda']
elif hw_accel == HW_ACCEL_QSV:
# QSV 硬件加速解码
return ['-hwaccel', 'qsv', '-hwaccel_output_format', 'qsv']
else:
return []
def get_hwaccel_filter_prefix(hw_accel: str = HW_ACCEL_NONE) -> str:
"""
获取硬件加速滤镜前缀(用于 hwdownload 从 GPU 到 CPU)
注意:由于大多数复杂滤镜(如 lut3d, overlay, crop 等)不支持硬件表面,
我们需要在滤镜链开始时将硬件表面下载到系统内存。
Args:
hw_accel: 硬件加速类型
Returns:
需要添加到滤镜链开头的 hwdownload 滤镜字符串
"""
if hw_accel == HW_ACCEL_CUDA:
return 'hwdownload,format=nv12,'
elif hw_accel == HW_ACCEL_QSV:
return 'hwdownload,format=nv12,'
else:
return ''
# v2 统一视频编码参数(兼容旧代码,使用软件编码)
VIDEO_ENCODE_ARGS = get_video_encode_args(HW_ACCEL_NONE)
# v2 统一音频编码参数 # v2 统一音频编码参数
AUDIO_ENCODE_ARGS = [ AUDIO_ENCODE_ARGS = [
@@ -178,6 +261,33 @@ class BaseHandler(TaskHandler, ABC):
self.config = config self.config = config
self.api_client = api_client self.api_client = api_client
def get_video_encode_args(self) -> List[str]:
"""
获取当前配置的视频编码参数
Returns:
FFmpeg 视频编码参数列表
"""
return get_video_encode_args(self.config.hw_accel)
def get_hwaccel_decode_args(self) -> List[str]:
"""
获取硬件加速解码参数(在输入文件之前使用)
Returns:
FFmpeg 硬件加速解码参数列表
"""
return get_hwaccel_decode_args(self.config.hw_accel)
def get_hwaccel_filter_prefix(self) -> str:
"""
获取硬件加速滤镜前缀
Returns:
需要添加到滤镜链开头的 hwdownload 滤镜字符串
"""
return get_hwaccel_filter_prefix(self.config.hw_accel)
def before_handle(self, task: Task) -> None: def before_handle(self, task: Task) -> None:
"""处理前钩子""" """处理前钩子"""
logger.debug(f"[task:{task.task_id}] Before handle: {task.task_type.value}") logger.debug(f"[task:{task.task_id}] Before handle: {task.task_type.value}")

View File

@@ -10,7 +10,7 @@ import os
import logging import logging
from typing import List, Optional from typing import List, Optional
from handlers.base import BaseHandler, VIDEO_ENCODE_ARGS from handlers.base import BaseHandler
from domain.task import Task, TaskType, TransitionConfig, TRANSITION_TYPES from domain.task import Task, TaskType, TransitionConfig, TRANSITION_TYPES
from domain.result import TaskResult, ErrorCode from domain.result import TaskResult, ErrorCode
@@ -235,8 +235,8 @@ class ComposeTransitionHandler(BaseHandler):
'-map', '[outv]', '-map', '[outv]',
] ]
# 编码参数(与片段视频一致 # 编码参数(根据硬件加速配置动态获取
cmd.extend(VIDEO_ENCODE_ARGS) cmd.extend(self.get_video_encode_args())
# 帧率 # 帧率
fps = output_spec.fps fps = output_spec.fps

View File

@@ -10,7 +10,7 @@ import os
import logging import logging
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
from handlers.base import BaseHandler, VIDEO_ENCODE_ARGS from handlers.base import BaseHandler
from domain.task import Task, TaskType, RenderSpec, OutputSpec, Effect from domain.task import Task, TaskType, RenderSpec, OutputSpec, Effect
from domain.result import TaskResult, ErrorCode from domain.result import TaskResult, ErrorCode
@@ -170,6 +170,11 @@ class RenderSegmentVideoHandler(BaseHandler):
""" """
cmd = ['ffmpeg', '-y', '-hide_banner'] cmd = ['ffmpeg', '-y', '-hide_banner']
# 硬件加速解码参数(在输入文件之前)
hwaccel_args = self.get_hwaccel_decode_args()
if hwaccel_args:
cmd.extend(hwaccel_args)
# 输入文件 # 输入文件
cmd.extend(['-i', input_file]) cmd.extend(['-i', input_file])
@@ -196,8 +201,8 @@ class RenderSegmentVideoHandler(BaseHandler):
elif filters: elif filters:
cmd.extend(['-vf', filters]) cmd.extend(['-vf', filters])
# 编码参数(v2 统一参数 # 编码参数(根据硬件加速配置动态获取
cmd.extend(VIDEO_ENCODE_ARGS) cmd.extend(self.get_video_encode_args())
# 帧率 # 帧率
fps = output_spec.fps fps = output_spec.fps
@@ -253,6 +258,12 @@ class RenderSegmentVideoHandler(BaseHandler):
effects = render_spec.get_effects() effects = render_spec.get_effects()
has_camera_shot = any(e.effect_type == 'cameraShot' for e in effects) has_camera_shot = any(e.effect_type == 'cameraShot' for e in effects)
# 硬件加速时需要先 hwdownload(将 GPU 表面下载到系统内存)
hwaccel_prefix = self.get_hwaccel_filter_prefix()
if hwaccel_prefix:
# 去掉末尾的逗号,作为第一个滤镜
filters.append(hwaccel_prefix.rstrip(','))
# 1. 变速处理 # 1. 变速处理
speed = float(render_spec.speed) if render_spec.speed else 1.0 speed = float(render_spec.speed) if render_spec.speed else 1.0
if speed != 1.0 and speed > 0: if speed != 1.0 and speed > 0:
@@ -304,7 +315,8 @@ class RenderSegmentVideoHandler(BaseHandler):
fps=fps, fps=fps,
has_overlay=has_overlay, has_overlay=has_overlay,
overlap_head_ms=overlap_head_ms, overlap_head_ms=overlap_head_ms,
overlap_tail_ms=overlap_tail_ms overlap_tail_ms=overlap_tail_ms,
use_hwdownload=bool(hwaccel_prefix)
) )
# 6. 帧冻结(tpad)- 用于转场 overlap 区域 # 6. 帧冻结(tpad)- 用于转场 overlap 区域
@@ -337,7 +349,8 @@ class RenderSegmentVideoHandler(BaseHandler):
fps: int, fps: int,
has_overlay: bool = False, has_overlay: bool = False,
overlap_head_ms: int = 0, overlap_head_ms: int = 0,
overlap_tail_ms: int = 0 overlap_tail_ms: int = 0,
use_hwdownload: bool = False
) -> str: ) -> str:
""" """
构建包含特效的 filter_complex 滤镜图 构建包含特效的 filter_complex 滤镜图
@@ -351,6 +364,7 @@ class RenderSegmentVideoHandler(BaseHandler):
has_overlay: 是否有叠加层 has_overlay: 是否有叠加层
overlap_head_ms: 头部 overlap 时长 overlap_head_ms: 头部 overlap 时长
overlap_tail_ms: 尾部 overlap 时长 overlap_tail_ms: 尾部 overlap 时长
use_hwdownload: 是否使用了硬件加速解码(已在 base_filters 中包含 hwdownload)
Returns: Returns:
filter_complex 格式的滤镜字符串 filter_complex 格式的滤镜字符串

View File

@@ -12,6 +12,7 @@ from typing import Dict, List, Optional, Any
from domain.task import Task from domain.task import Task
from domain.config import WorkerConfig from domain.config import WorkerConfig
from util.system import get_hw_accel_info_str
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -338,7 +339,9 @@ class APIClientV2:
'cpu': f"{psutil.cpu_count()} cores", 'cpu': f"{psutil.cpu_count()} cores",
'memory': f"{psutil.virtual_memory().total // (1024**3)}GB", 'memory': f"{psutil.virtual_memory().total // (1024**3)}GB",
'cpuUsage': f"{psutil.cpu_percent()}%", 'cpuUsage': f"{psutil.cpu_percent()}%",
'memoryAvailable': f"{psutil.virtual_memory().available // (1024**3)}GB" 'memoryAvailable': f"{psutil.virtual_memory().available // (1024**3)}GB",
'hwAccelConfig': self.config.hw_accel, # 当前配置的硬件加速
'hwAccelSupport': get_hw_accel_info_str(), # 系统支持的硬件加速
} }
# 尝试获取 GPU 信息 # 尝试获取 GPU 信息

View File

@@ -8,10 +8,10 @@
import os import os
import platform import platform
import subprocess import subprocess
from typing import Optional from typing import Optional, Dict, Any
import psutil import psutil
from constant import SOFTWARE_VERSION, DEFAULT_CAPABILITIES from constant import SOFTWARE_VERSION, DEFAULT_CAPABILITIES, HW_ACCEL_NONE, HW_ACCEL_QSV, HW_ACCEL_CUDA
def get_sys_info(): def get_sys_info():
@@ -101,3 +101,166 @@ def get_ffmpeg_version() -> str:
pass pass
return 'unknown' return 'unknown'
def check_ffmpeg_encoder(encoder: str) -> bool:
"""
检查 FFmpeg 是否支持指定的编码器
Args:
encoder: 编码器名称,如 'h264_nvenc', 'h264_qsv'
Returns:
bool: 是否支持该编码器
"""
try:
result = subprocess.run(
['ffmpeg', '-hide_banner', '-encoders'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return encoder in result.stdout
except Exception:
pass
return False
def check_ffmpeg_decoder(decoder: str) -> bool:
"""
检查 FFmpeg 是否支持指定的解码器
Args:
decoder: 解码器名称,如 'h264_cuvid', 'h264_qsv'
Returns:
bool: 是否支持该解码器
"""
try:
result = subprocess.run(
['ffmpeg', '-hide_banner', '-decoders'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return decoder in result.stdout
except Exception:
pass
return False
def check_ffmpeg_hwaccel(hwaccel: str) -> bool:
"""
检查 FFmpeg 是否支持指定的硬件加速方法
Args:
hwaccel: 硬件加速方法,如 'cuda', 'qsv', 'dxva2', 'd3d11va'
Returns:
bool: 是否支持该硬件加速方法
"""
try:
result = subprocess.run(
['ffmpeg', '-hide_banner', '-hwaccels'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return hwaccel in result.stdout
except Exception:
pass
return False
def detect_hw_accel_support() -> Dict[str, Any]:
"""
检测系统的硬件加速支持情况
Returns:
dict: 硬件加速支持信息
{
'cuda': {
'available': bool,
'gpu': str or None,
'encoder': bool, # h264_nvenc
'decoder': bool, # h264_cuvid
},
'qsv': {
'available': bool,
'encoder': bool, # h264_qsv
'decoder': bool, # h264_qsv
},
'recommended': str # 推荐的加速方式: 'cuda', 'qsv', 'none'
}
"""
result = {
'cuda': {
'available': False,
'gpu': None,
'encoder': False,
'decoder': False,
},
'qsv': {
'available': False,
'encoder': False,
'decoder': False,
},
'recommended': HW_ACCEL_NONE
}
# 检测 CUDA/NVENC 支持
gpu_info = get_gpu_info()
if gpu_info:
result['cuda']['gpu'] = gpu_info
result['cuda']['available'] = check_ffmpeg_hwaccel('cuda')
result['cuda']['encoder'] = check_ffmpeg_encoder('h264_nvenc')
result['cuda']['decoder'] = check_ffmpeg_decoder('h264_cuvid')
# 检测 QSV 支持
result['qsv']['available'] = check_ffmpeg_hwaccel('qsv')
result['qsv']['encoder'] = check_ffmpeg_encoder('h264_qsv')
result['qsv']['decoder'] = check_ffmpeg_decoder('h264_qsv')
# 推荐硬件加速方式(优先 CUDA,其次 QSV)
if result['cuda']['available'] and result['cuda']['encoder']:
result['recommended'] = HW_ACCEL_CUDA
elif result['qsv']['available'] and result['qsv']['encoder']:
result['recommended'] = HW_ACCEL_QSV
return result
def get_hw_accel_info_str() -> str:
"""
获取硬件加速支持信息的可读字符串
Returns:
str: 硬件加速支持信息描述
"""
support = detect_hw_accel_support()
parts = []
if support['cuda']['available']:
gpu = support['cuda']['gpu'] or 'Unknown GPU'
status = 'encoder+decoder' if support['cuda']['encoder'] and support['cuda']['decoder'] else (
'encoder only' if support['cuda']['encoder'] else 'decoder only' if support['cuda']['decoder'] else 'hwaccel only'
)
parts.append(f"CUDA({gpu}, {status})")
if support['qsv']['available']:
status = 'encoder+decoder' if support['qsv']['encoder'] and support['qsv']['decoder'] else (
'encoder only' if support['qsv']['encoder'] else 'decoder only' if support['qsv']['decoder'] else 'hwaccel only'
)
parts.append(f"QSV({status})")
if not parts:
return "No hardware acceleration available"
return ', '.join(parts) + f" [recommended: {support['recommended']}]"