You've already forked FrameTour-RenderWorker
Compare commits
20 Commits
e5c5a181d3
...
next
| Author | SHA1 | Date | |
|---|---|---|---|
| dd2d40c55b | |||
| c57524f174 | |||
| eeb21cada3 | |||
| a70573395b | |||
| ffb9d5390e | |||
| 6126856361 | |||
| a6263398ed | |||
| 885b69233a | |||
| 9158854411 | |||
| 634dc6c855 | |||
| ca9093504f | |||
| ceba9a17a4 | |||
| 7acae2f708 | |||
| ed8dca543e | |||
| 0a7a0dac89 | |||
| 797507d24b | |||
| f7ca07b9db | |||
| 4d5e57f61b | |||
| b291f33486 | |||
| 0cc96a968b |
10
.env.example
10
.env.example
@@ -32,11 +32,17 @@ TEMP_DIR=tmp/
|
||||
#UPLOAD_TIMEOUT=600 # 上传超时(秒)
|
||||
|
||||
# ===================
|
||||
# 硬件加速
|
||||
# 硬件加速与多显卡
|
||||
# ===================
|
||||
# 可选值: none, qsv, cuda
|
||||
# 硬件加速类型: none, qsv, cuda
|
||||
HW_ACCEL=none
|
||||
|
||||
# GPU 设备列表(逗号分隔的设备索引)
|
||||
# 不配置时:自动检测所有设备
|
||||
# 单设备示例:GPU_DEVICES=0
|
||||
# 多设备示例:GPU_DEVICES=0,1,2
|
||||
#GPU_DEVICES=0,1
|
||||
|
||||
# ===================
|
||||
# 素材缓存
|
||||
# ===================
|
||||
|
||||
@@ -5,12 +5,15 @@ Worker 配置模型
|
||||
定义 Worker 运行时的配置参数。
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
from constant import HW_ACCEL_NONE, HW_ACCEL_QSV, HW_ACCEL_CUDA, HW_ACCEL_TYPES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# 默认支持的任务类型
|
||||
DEFAULT_CAPABILITIES = [
|
||||
@@ -59,6 +62,9 @@ class WorkerConfig:
|
||||
# 硬件加速配置
|
||||
hw_accel: str = HW_ACCEL_NONE # 硬件加速类型: none, qsv, cuda
|
||||
|
||||
# GPU 设备配置(多显卡调度)
|
||||
gpu_devices: List[int] = field(default_factory=list) # 空列表表示使用默认设备
|
||||
|
||||
# 素材缓存配置
|
||||
cache_enabled: bool = True # 是否启用素材缓存
|
||||
cache_dir: str = "" # 缓存目录,默认为 temp_dir/cache
|
||||
@@ -113,6 +119,16 @@ class WorkerConfig:
|
||||
if hw_accel not in HW_ACCEL_TYPES:
|
||||
hw_accel = HW_ACCEL_NONE
|
||||
|
||||
# GPU 设备列表(用于多显卡调度)
|
||||
gpu_devices_str = os.getenv('GPU_DEVICES', '')
|
||||
gpu_devices: List[int] = []
|
||||
if gpu_devices_str:
|
||||
try:
|
||||
gpu_devices = [int(d.strip()) for d in gpu_devices_str.split(',') if d.strip()]
|
||||
except ValueError:
|
||||
logger.warning(f"Invalid GPU_DEVICES value: {gpu_devices_str}, using auto-detect")
|
||||
gpu_devices = []
|
||||
|
||||
# 素材缓存配置
|
||||
cache_enabled = os.getenv('CACHE_ENABLED', 'true').lower() in ('true', '1', 'yes')
|
||||
cache_dir = os.getenv('CACHE_DIR', '') # 空字符串表示使用默认路径
|
||||
@@ -132,6 +148,7 @@ class WorkerConfig:
|
||||
download_timeout=download_timeout,
|
||||
upload_timeout=upload_timeout,
|
||||
hw_accel=hw_accel,
|
||||
gpu_devices=gpu_devices,
|
||||
cache_enabled=cache_enabled,
|
||||
cache_dir=cache_dir if cache_dir else os.path.join(temp_dir, 'cache'),
|
||||
cache_max_size_gb=cache_max_size_gb
|
||||
@@ -156,3 +173,11 @@ class WorkerConfig:
|
||||
def is_cuda(self) -> bool:
|
||||
"""是否使用 CUDA 硬件加速"""
|
||||
return self.hw_accel == HW_ACCEL_CUDA
|
||||
|
||||
def has_multi_gpu(self) -> bool:
|
||||
"""是否配置了多 GPU"""
|
||||
return len(self.gpu_devices) > 1
|
||||
|
||||
def get_gpu_devices(self) -> List[int]:
|
||||
"""获取 GPU 设备列表"""
|
||||
return self.gpu_devices.copy()
|
||||
|
||||
31
domain/gpu.py
Normal file
31
domain/gpu.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
GPU 设备模型
|
||||
|
||||
定义 GPU 设备的数据结构。
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPUDevice:
|
||||
"""
|
||||
GPU 设备信息
|
||||
|
||||
Attributes:
|
||||
index: 设备索引(对应 nvidia-smi 中的 GPU ID)
|
||||
name: 设备名称(如 "NVIDIA GeForce RTX 3090")
|
||||
memory_total: 显存总量(MB),可选
|
||||
available: 设备是否可用
|
||||
"""
|
||||
index: int
|
||||
name: str
|
||||
memory_total: Optional[int] = None
|
||||
available: bool = True
|
||||
|
||||
def __str__(self) -> str:
|
||||
status = "available" if self.available else "unavailable"
|
||||
mem_info = f", {self.memory_total}MB" if self.memory_total else ""
|
||||
return f"GPU[{self.index}]: {self.name}{mem_info} ({status})"
|
||||
@@ -11,6 +11,7 @@ import logging
|
||||
import shutil
|
||||
import tempfile
|
||||
import subprocess
|
||||
import threading
|
||||
from abc import ABC
|
||||
from typing import Optional, List, Dict, Any, Tuple, TYPE_CHECKING
|
||||
|
||||
@@ -75,23 +76,33 @@ def get_video_encode_args(hw_accel: str = HW_ACCEL_NONE) -> List[str]:
|
||||
]
|
||||
|
||||
|
||||
def get_hwaccel_decode_args(hw_accel: str = HW_ACCEL_NONE) -> List[str]:
|
||||
def get_hwaccel_decode_args(hw_accel: str = HW_ACCEL_NONE, device_index: Optional[int] = None) -> List[str]:
|
||||
"""
|
||||
获取硬件加速解码参数(输入文件之前使用)
|
||||
|
||||
Args:
|
||||
hw_accel: 硬件加速类型 (none, qsv, cuda)
|
||||
device_index: GPU 设备索引,用于多显卡调度
|
||||
|
||||
Returns:
|
||||
FFmpeg 硬件加速解码参数列表
|
||||
"""
|
||||
if hw_accel == HW_ACCEL_CUDA:
|
||||
# CUDA 硬件加速解码
|
||||
# 注意:使用 cuda 作为 hwaccel,但输出到系统内存以便 CPU 滤镜处理
|
||||
return ['-hwaccel', 'cuda', '-hwaccel_output_format', 'cuda']
|
||||
args = ['-hwaccel', 'cuda']
|
||||
# 多显卡模式下指定设备
|
||||
if device_index is not None:
|
||||
args.extend(['-hwaccel_device', str(device_index)])
|
||||
args.extend(['-hwaccel_output_format', 'cuda'])
|
||||
return args
|
||||
elif hw_accel == HW_ACCEL_QSV:
|
||||
# QSV 硬件加速解码
|
||||
return ['-hwaccel', 'qsv', '-hwaccel_output_format', 'qsv']
|
||||
args = ['-hwaccel', 'qsv']
|
||||
# QSV 在 Windows 上使用 -qsv_device
|
||||
if device_index is not None:
|
||||
args.extend(['-qsv_device', str(device_index)])
|
||||
args.extend(['-hwaccel_output_format', 'qsv'])
|
||||
return args
|
||||
else:
|
||||
return []
|
||||
|
||||
@@ -103,6 +114,10 @@ def get_hwaccel_filter_prefix(hw_accel: str = HW_ACCEL_NONE) -> str:
|
||||
注意:由于大多数复杂滤镜(如 lut3d, overlay, crop 等)不支持硬件表面,
|
||||
我们需要在滤镜链开始时将硬件表面下载到系统内存。
|
||||
|
||||
CUDA/QSV hwdownload 只支持 nv12 格式输出,因此需要两步转换:
|
||||
1. hwdownload,format=nv12 - 从 GPU 下载到 CPU
|
||||
2. format=yuv420p - 转换为标准格式(确保与 RGBA/YUVA overlay 混合时颜色正确)
|
||||
|
||||
Args:
|
||||
hw_accel: 硬件加速类型
|
||||
|
||||
@@ -110,9 +125,9 @@ def get_hwaccel_filter_prefix(hw_accel: str = HW_ACCEL_NONE) -> str:
|
||||
需要添加到滤镜链开头的 hwdownload 滤镜字符串
|
||||
"""
|
||||
if hw_accel == HW_ACCEL_CUDA:
|
||||
return 'hwdownload,format=nv12,'
|
||||
return 'hwdownload,format=nv12,format=yuv420p,'
|
||||
elif hw_accel == HW_ACCEL_QSV:
|
||||
return 'hwdownload,format=nv12,'
|
||||
return 'hwdownload,format=nv12,format=yuv420p,'
|
||||
else:
|
||||
return ''
|
||||
|
||||
@@ -128,6 +143,8 @@ AUDIO_ENCODE_ARGS = [
|
||||
'-ac', '2',
|
||||
]
|
||||
|
||||
FFMPEG_LOGLEVEL = 'error'
|
||||
|
||||
|
||||
def subprocess_args(include_stdout: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -248,9 +265,13 @@ class BaseHandler(TaskHandler, ABC):
|
||||
- 临时目录管理
|
||||
- 文件下载/上传
|
||||
- FFmpeg 命令执行
|
||||
- GPU 设备管理(多显卡调度)
|
||||
- 日志记录
|
||||
"""
|
||||
|
||||
# 线程本地存储:用于存储当前线程的 GPU 设备索引
|
||||
_thread_local = threading.local()
|
||||
|
||||
def __init__(self, config: WorkerConfig, api_client: 'APIClientV2'):
|
||||
"""
|
||||
初始化处理器
|
||||
@@ -267,6 +288,39 @@ class BaseHandler(TaskHandler, ABC):
|
||||
max_size_gb=config.cache_max_size_gb
|
||||
)
|
||||
|
||||
# ========== GPU 设备管理 ==========
|
||||
|
||||
def set_gpu_device(self, device_index: int) -> None:
|
||||
"""
|
||||
设置当前线程的 GPU 设备索引
|
||||
|
||||
由 TaskExecutor 在任务执行前调用。
|
||||
|
||||
Args:
|
||||
device_index: GPU 设备索引
|
||||
"""
|
||||
self._thread_local.gpu_device = device_index
|
||||
|
||||
def get_gpu_device(self) -> Optional[int]:
|
||||
"""
|
||||
获取当前线程的 GPU 设备索引
|
||||
|
||||
Returns:
|
||||
GPU 设备索引,未设置则返回 None
|
||||
"""
|
||||
return getattr(self._thread_local, 'gpu_device', None)
|
||||
|
||||
def clear_gpu_device(self) -> None:
|
||||
"""
|
||||
清除当前线程的 GPU 设备索引
|
||||
|
||||
由 TaskExecutor 在任务执行后调用。
|
||||
"""
|
||||
if hasattr(self._thread_local, 'gpu_device'):
|
||||
del self._thread_local.gpu_device
|
||||
|
||||
# ========== FFmpeg 参数生成 ==========
|
||||
|
||||
def get_video_encode_args(self) -> List[str]:
|
||||
"""
|
||||
获取当前配置的视频编码参数
|
||||
@@ -278,12 +332,13 @@ class BaseHandler(TaskHandler, ABC):
|
||||
|
||||
def get_hwaccel_decode_args(self) -> List[str]:
|
||||
"""
|
||||
获取硬件加速解码参数(在输入文件之前使用)
|
||||
获取硬件加速解码参数(支持设备指定)
|
||||
|
||||
Returns:
|
||||
FFmpeg 硬件加速解码参数列表
|
||||
"""
|
||||
return get_hwaccel_decode_args(self.config.hw_accel)
|
||||
device_index = self.get_gpu_device()
|
||||
return get_hwaccel_decode_args(self.config.hw_accel, device_index)
|
||||
|
||||
def get_hwaccel_filter_prefix(self) -> str:
|
||||
"""
|
||||
@@ -409,6 +464,11 @@ class BaseHandler(TaskHandler, ABC):
|
||||
if result:
|
||||
file_size = os.path.getsize(file_path)
|
||||
logger.info(f"[task:{task_id}] Uploaded: {file_path} ({file_size} bytes)")
|
||||
|
||||
# 将上传成功的文件加入缓存
|
||||
if access_url:
|
||||
self.material_cache.add_to_cache(access_url, file_path)
|
||||
|
||||
return access_url
|
||||
else:
|
||||
logger.error(f"[task:{task_id}] Upload failed: {file_path}")
|
||||
@@ -437,22 +497,28 @@ class BaseHandler(TaskHandler, ABC):
|
||||
if timeout is None:
|
||||
timeout = self.config.ffmpeg_timeout
|
||||
|
||||
cmd_to_run = list(cmd)
|
||||
if cmd_to_run and cmd_to_run[0] == 'ffmpeg' and '-loglevel' not in cmd_to_run:
|
||||
cmd_to_run[1:1] = ['-loglevel', FFMPEG_LOGLEVEL]
|
||||
|
||||
# 日志记录命令(限制长度)
|
||||
cmd_str = ' '.join(cmd)
|
||||
cmd_str = ' '.join(cmd_to_run)
|
||||
if len(cmd_str) > 500:
|
||||
cmd_str = cmd_str[:500] + '...'
|
||||
logger.info(f"[task:{task_id}] FFmpeg: {cmd_str}")
|
||||
|
||||
try:
|
||||
run_args = subprocess_args(False)
|
||||
run_args['stdout'] = subprocess.DEVNULL
|
||||
run_args['stderr'] = subprocess.PIPE
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
cmd_to_run,
|
||||
timeout=timeout,
|
||||
**subprocess_args(False)
|
||||
**run_args
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr.decode('utf-8', errors='replace')[:1000]
|
||||
stderr = (result.stderr or b'').decode('utf-8', errors='replace')[:1000]
|
||||
logger.error(f"[task:{task_id}] FFmpeg failed (code={result.returncode}): {stderr}")
|
||||
return False
|
||||
|
||||
|
||||
@@ -127,9 +127,9 @@ class FinalizeMp4Handler(BaseHandler):
|
||||
concat_file = os.path.join(work_dir, 'concat.txt')
|
||||
with open(concat_file, 'w', encoding='utf-8') as f:
|
||||
for ts_file in ts_files:
|
||||
# 路径中的反斜杠需要转义或使用正斜杠
|
||||
ts_path = ts_file.replace('\\', '/')
|
||||
f.write(f"file '{ts_path}'\n")
|
||||
# FFmpeg concat 路径相对于 concat.txt 所在目录,只需写文件名
|
||||
ts_filename = os.path.basename(ts_file)
|
||||
f.write(f"file '{ts_filename}'\n")
|
||||
|
||||
# 3. 构建合并命令(remux,不重编码)
|
||||
cmd = [
|
||||
|
||||
@@ -123,21 +123,48 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
overlay_file = None
|
||||
if render_spec.overlay_url:
|
||||
# 根据 URL 后缀确定文件扩展名
|
||||
ext = '.png'
|
||||
if render_spec.overlay_url.lower().endswith('.jpg') or render_spec.overlay_url.lower().endswith('.jpeg'):
|
||||
url_lower = render_spec.overlay_url.lower()
|
||||
if url_lower.endswith('.jpg') or url_lower.endswith('.jpeg'):
|
||||
ext = '.jpg'
|
||||
elif url_lower.endswith('.mov'):
|
||||
ext = '.mov'
|
||||
else:
|
||||
ext = '.png' # 默认
|
||||
overlay_file = os.path.join(work_dir, f'overlay{ext}')
|
||||
if not self.download_file(render_spec.overlay_url, overlay_file):
|
||||
logger.warning(f"[task:{task.task_id}] Failed to download overlay, continuing without it")
|
||||
overlay_file = None
|
||||
|
||||
# 6. 计算 overlap 时长(用于转场帧冻结)
|
||||
# 6. 探测源视频时长(仅对视频素材)
|
||||
# 用于检测时长不足并通过冻结最后一帧补足
|
||||
source_duration_sec = None
|
||||
if not is_image:
|
||||
source_duration = self.probe_duration(input_file)
|
||||
if source_duration:
|
||||
source_duration_sec = source_duration
|
||||
speed = float(render_spec.speed) if render_spec.speed else 1.0
|
||||
if speed > 0:
|
||||
# 计算变速后的有效时长
|
||||
effective_duration_sec = source_duration_sec / speed
|
||||
required_duration_sec = duration_ms / 1000.0
|
||||
|
||||
# 如果源视频时长不足,记录日志
|
||||
if effective_duration_sec < required_duration_sec:
|
||||
shortage_sec = required_duration_sec - effective_duration_sec
|
||||
logger.warning(
|
||||
f"[task:{task.task_id}] Source video duration insufficient: "
|
||||
f"effective={effective_duration_sec:.2f}s (speed={speed}), "
|
||||
f"required={required_duration_sec:.2f}s, "
|
||||
f"will freeze last frame for {shortage_sec:.2f}s"
|
||||
)
|
||||
|
||||
# 7. 计算 overlap 时长(用于转场帧冻结)
|
||||
# 头部 overlap: 来自前一片段的出场转场
|
||||
overlap_head_ms = task.get_overlap_head_ms()
|
||||
# 尾部 overlap: 当前片段的出场转场
|
||||
overlap_tail_ms = task.get_overlap_tail_ms_v2()
|
||||
|
||||
# 7. 构建 FFmpeg 命令
|
||||
# 8. 构建 FFmpeg 命令
|
||||
output_file = os.path.join(work_dir, 'output.mp4')
|
||||
cmd = self._build_command(
|
||||
input_file=input_file,
|
||||
@@ -148,28 +175,29 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
lut_file=lut_file,
|
||||
overlay_file=overlay_file,
|
||||
overlap_head_ms=overlap_head_ms,
|
||||
overlap_tail_ms=overlap_tail_ms
|
||||
overlap_tail_ms=overlap_tail_ms,
|
||||
source_duration_sec=source_duration_sec
|
||||
)
|
||||
|
||||
# 8. 执行 FFmpeg
|
||||
# 9. 执行 FFmpeg
|
||||
if not self.run_ffmpeg(cmd, task.task_id):
|
||||
return TaskResult.fail(
|
||||
ErrorCode.E_FFMPEG_FAILED,
|
||||
"FFmpeg rendering failed"
|
||||
)
|
||||
|
||||
# 9. 验证输出文件
|
||||
# 10. 验证输出文件
|
||||
if not self.ensure_file_exists(output_file, min_size=4096):
|
||||
return TaskResult.fail(
|
||||
ErrorCode.E_FFMPEG_FAILED,
|
||||
"Output file is missing or too small"
|
||||
)
|
||||
|
||||
# 10. 获取实际时长
|
||||
# 11. 获取实际时长
|
||||
actual_duration = self.probe_duration(output_file)
|
||||
actual_duration_ms = int(actual_duration * 1000) if actual_duration else duration_ms
|
||||
|
||||
# 11. 上传产物
|
||||
# 12. 上传产物
|
||||
video_url = self.upload_file(task.task_id, 'video', output_file)
|
||||
if not video_url:
|
||||
return TaskResult.fail(
|
||||
@@ -177,7 +205,7 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
"Failed to upload video"
|
||||
)
|
||||
|
||||
# 12. 构建结果(包含 overlap 信息)
|
||||
# 13. 构建结果(包含 overlap 信息)
|
||||
result_data = {
|
||||
'videoUrl': video_url,
|
||||
'actualDurationMs': actual_duration_ms,
|
||||
@@ -271,12 +299,24 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
|
||||
cmd.extend(['-vf', ','.join(filters)])
|
||||
|
||||
# 计算总帧数,动态调整 GOP
|
||||
total_frames = int(actual_duration_sec * fps)
|
||||
if total_frames <= 1:
|
||||
gop_size = 1
|
||||
elif total_frames < fps:
|
||||
gop_size = total_frames
|
||||
else:
|
||||
gop_size = fps * 2 # 正常情况,2 秒一个关键帧
|
||||
|
||||
# 编码参数
|
||||
cmd.extend([
|
||||
'-c:v', 'libx264',
|
||||
'-preset', 'fast',
|
||||
'-crf', '18',
|
||||
'-r', str(fps),
|
||||
'-g', str(gop_size),
|
||||
'-keyint_min', str(min(gop_size, fps // 2 or 1)),
|
||||
'-force_key_frames', 'expr:eq(n,0)',
|
||||
'-an', # 无音频
|
||||
output_file
|
||||
])
|
||||
@@ -294,7 +334,8 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
lut_file: Optional[str] = None,
|
||||
overlay_file: Optional[str] = None,
|
||||
overlap_head_ms: int = 0,
|
||||
overlap_tail_ms: int = 0
|
||||
overlap_tail_ms: int = 0,
|
||||
source_duration_sec: Optional[float] = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
构建 FFmpeg 渲染命令
|
||||
@@ -309,6 +350,7 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
overlay_file: 叠加层文件路径(可选)
|
||||
overlap_head_ms: 头部 overlap 时长(毫秒)
|
||||
overlap_tail_ms: 尾部 overlap 时长(毫秒)
|
||||
source_duration_sec: 源视频实际时长(秒),用于检测时长不足
|
||||
|
||||
Returns:
|
||||
FFmpeg 命令参数列表
|
||||
@@ -331,10 +373,12 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
filters = self._build_video_filters(
|
||||
render_spec=render_spec,
|
||||
output_spec=output_spec,
|
||||
duration_ms=duration_ms,
|
||||
lut_file=lut_file,
|
||||
has_overlay=overlay_file is not None,
|
||||
overlay_file=overlay_file,
|
||||
overlap_head_ms=overlap_head_ms,
|
||||
overlap_tail_ms=overlap_tail_ms
|
||||
overlap_tail_ms=overlap_tail_ms,
|
||||
source_duration_sec=source_duration_sec
|
||||
)
|
||||
|
||||
# 应用滤镜
|
||||
@@ -353,16 +397,28 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
fps = output_spec.fps
|
||||
cmd.extend(['-r', str(fps)])
|
||||
|
||||
# GOP 大小(关键帧间隔)
|
||||
gop_size = fps * 2 # 2秒一个关键帧
|
||||
cmd.extend(['-g', str(gop_size)])
|
||||
cmd.extend(['-keyint_min', str(gop_size)])
|
||||
|
||||
# 时长(包含 overlap 区域)
|
||||
total_duration_ms = duration_ms + overlap_head_ms + overlap_tail_ms
|
||||
duration_sec = total_duration_ms / 1000.0
|
||||
cmd.extend(['-t', str(duration_sec)])
|
||||
|
||||
# 动态调整 GOP 大小:对于短视频,GOP 不能大于总帧数
|
||||
total_frames = int(duration_sec * fps)
|
||||
if total_frames <= 1:
|
||||
gop_size = 1
|
||||
elif total_frames < fps:
|
||||
# 短于 1 秒的视频,使用全部帧数作为 GOP(整个视频只有开头一个关键帧)
|
||||
gop_size = total_frames
|
||||
else:
|
||||
# 正常情况,2 秒一个关键帧
|
||||
gop_size = fps * 2
|
||||
|
||||
cmd.extend(['-g', str(gop_size)])
|
||||
cmd.extend(['-keyint_min', str(min(gop_size, fps // 2 or 1))])
|
||||
|
||||
# 强制第一帧为关键帧
|
||||
cmd.extend(['-force_key_frames', 'expr:eq(n,0)'])
|
||||
|
||||
# 无音频(视频片段不包含音频)
|
||||
cmd.append('-an')
|
||||
|
||||
@@ -375,10 +431,12 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
self,
|
||||
render_spec: RenderSpec,
|
||||
output_spec: OutputSpec,
|
||||
duration_ms: int,
|
||||
lut_file: Optional[str] = None,
|
||||
has_overlay: bool = False,
|
||||
overlay_file: Optional[str] = None,
|
||||
overlap_head_ms: int = 0,
|
||||
overlap_tail_ms: int = 0
|
||||
overlap_tail_ms: int = 0,
|
||||
source_duration_sec: Optional[float] = None
|
||||
) -> str:
|
||||
"""
|
||||
构建视频滤镜链
|
||||
@@ -386,10 +444,12 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
Args:
|
||||
render_spec: 渲染规格
|
||||
output_spec: 输出规格
|
||||
duration_ms: 目标时长(毫秒)
|
||||
lut_file: LUT 文件路径
|
||||
has_overlay: 是否有叠加层
|
||||
overlay_file: 叠加层文件路径(支持图片 png/jpg 和视频 mov)
|
||||
overlap_head_ms: 头部 overlap 时长(毫秒)
|
||||
overlap_tail_ms: 尾部 overlap 时长(毫秒)
|
||||
source_duration_sec: 源视频实际时长(秒),用于检测时长不足
|
||||
|
||||
Returns:
|
||||
滤镜字符串
|
||||
@@ -399,6 +459,10 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
height = output_spec.height
|
||||
fps = output_spec.fps
|
||||
|
||||
# 判断 overlay 类型
|
||||
has_overlay = overlay_file is not None
|
||||
is_video_overlay = has_overlay and overlay_file.lower().endswith('.mov')
|
||||
|
||||
# 解析 effects
|
||||
effects = render_spec.get_effects()
|
||||
has_camera_shot = any(e.effect_type == 'cameraShot' for e in effects)
|
||||
@@ -458,23 +522,45 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
base_filters=filters,
|
||||
effects=effects,
|
||||
fps=fps,
|
||||
width=width,
|
||||
height=height,
|
||||
has_overlay=has_overlay,
|
||||
is_video_overlay=is_video_overlay,
|
||||
overlap_head_ms=overlap_head_ms,
|
||||
overlap_tail_ms=overlap_tail_ms,
|
||||
use_hwdownload=bool(hwaccel_prefix)
|
||||
use_hwdownload=bool(hwaccel_prefix),
|
||||
duration_ms=duration_ms,
|
||||
render_spec=render_spec,
|
||||
source_duration_sec=source_duration_sec
|
||||
)
|
||||
|
||||
# 6. 帧冻结(tpad)- 用于转场 overlap 区域
|
||||
# 6. 帧冻结(tpad)- 用于转场 overlap 区域和时长不足补足
|
||||
# 注意:tpad 必须在缩放之后应用
|
||||
tpad_parts = []
|
||||
|
||||
# 计算是否需要额外的尾部冻结(源视频时长不足)
|
||||
extra_tail_freeze_sec = 0.0
|
||||
if source_duration_sec is not None:
|
||||
speed = float(render_spec.speed) if render_spec.speed else 1.0
|
||||
if speed > 0:
|
||||
# 计算变速后的有效时长
|
||||
effective_duration_sec = source_duration_sec / speed
|
||||
required_duration_sec = duration_ms / 1000.0
|
||||
|
||||
# 如果源视频时长不足,需要冻结最后一帧来补足
|
||||
if effective_duration_sec < required_duration_sec:
|
||||
extra_tail_freeze_sec = required_duration_sec - effective_duration_sec
|
||||
|
||||
if overlap_head_ms > 0:
|
||||
# 头部冻结:将第一帧冻结指定时长
|
||||
head_duration_sec = overlap_head_ms / 1000.0
|
||||
tpad_parts.append(f"start_mode=clone:start_duration={head_duration_sec}")
|
||||
if overlap_tail_ms > 0:
|
||||
# 尾部冻结:将最后一帧冻结指定时长
|
||||
tail_duration_sec = overlap_tail_ms / 1000.0
|
||||
tpad_parts.append(f"stop_mode=clone:stop_duration={tail_duration_sec}")
|
||||
|
||||
# 尾部冻结:合并 overlap 和时长不足的冻结
|
||||
total_tail_freeze_sec = (overlap_tail_ms / 1000.0) + extra_tail_freeze_sec
|
||||
if total_tail_freeze_sec > 0:
|
||||
# 将最后一帧冻结指定时长
|
||||
tpad_parts.append(f"stop_mode=clone:stop_duration={total_tail_freeze_sec}")
|
||||
|
||||
if tpad_parts:
|
||||
filters.append(f"tpad={':'.join(tpad_parts)}")
|
||||
@@ -483,7 +569,13 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
if has_overlay:
|
||||
# 使用 filter_complex 格式
|
||||
base_filters = ','.join(filters) if filters else 'copy'
|
||||
return f"[0:v]{base_filters}[base];[base][1:v]overlay=0:0"
|
||||
overlay_scale = f"scale={width}:{height}"
|
||||
# 视频 overlay 使用 eof_action=pass(结束后消失),图片 overlay 使用默认行为(保持显示)
|
||||
overlay_params = 'eof_action=pass' if is_video_overlay else ''
|
||||
overlay_filter = f"overlay=0:0:{overlay_params}" if overlay_params else 'overlay=0:0'
|
||||
# 视频 overlay 需要在末尾统一颜色范围,避免 overlay 结束后 range 从 tv 变为 pc
|
||||
range_fix = ',format=yuv420p,setrange=tv' if is_video_overlay else ''
|
||||
return f"[0:v]{base_filters}[base];[1:v]{overlay_scale}[overlay];[base][overlay]{overlay_filter}{range_fix}"
|
||||
else:
|
||||
return ','.join(filters) if filters else ''
|
||||
|
||||
@@ -492,10 +584,16 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
base_filters: List[str],
|
||||
effects: List[Effect],
|
||||
fps: int,
|
||||
width: int,
|
||||
height: int,
|
||||
has_overlay: bool = False,
|
||||
is_video_overlay: bool = False,
|
||||
overlap_head_ms: int = 0,
|
||||
overlap_tail_ms: int = 0,
|
||||
use_hwdownload: bool = False
|
||||
use_hwdownload: bool = False,
|
||||
duration_ms: int = 0,
|
||||
render_spec: Optional[RenderSpec] = None,
|
||||
source_duration_sec: Optional[float] = None
|
||||
) -> str:
|
||||
"""
|
||||
构建包含特效的 filter_complex 滤镜图
|
||||
@@ -506,10 +604,16 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
base_filters: 基础滤镜列表
|
||||
effects: 特效列表
|
||||
fps: 帧率
|
||||
width: 输出宽度
|
||||
height: 输出高度
|
||||
has_overlay: 是否有叠加层
|
||||
is_video_overlay: 叠加层是否为视频格式(如 .mov)
|
||||
overlap_head_ms: 头部 overlap 时长
|
||||
overlap_tail_ms: 尾部 overlap 时长
|
||||
use_hwdownload: 是否使用了硬件加速解码(已在 base_filters 中包含 hwdownload)
|
||||
duration_ms: 目标时长(毫秒)
|
||||
render_spec: 渲染规格(用于获取变速参数)
|
||||
source_duration_sec: 源视频实际时长(秒),用于检测时长不足
|
||||
|
||||
Returns:
|
||||
filter_complex 格式的滤镜字符串
|
||||
@@ -531,15 +635,16 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
if start_sec <= 0 or duration_sec <= 0:
|
||||
continue
|
||||
|
||||
# cameraShot 实现:
|
||||
# cameraShot 实现(定格效果):
|
||||
# 1. fps + split 分割
|
||||
# 2. 第一路:trim(0, start+duration) + freezeframes
|
||||
# 2. 第一路:trim(0, start) + tpad冻结duration秒
|
||||
# 3. 第二路:trim(start, end)
|
||||
# 4. concat 拼接
|
||||
|
||||
start_frame = start_sec * fps
|
||||
split_out_a = f'[eff{effect_idx}_a]'
|
||||
split_out_b = f'[eff{effect_idx}_b]'
|
||||
frozen_out = f'[eff{effect_idx}_frozen]'
|
||||
rest_out = f'[eff{effect_idx}_rest]'
|
||||
effect_output = f'[v_eff{effect_idx}]'
|
||||
|
||||
# fps + split
|
||||
@@ -547,37 +652,50 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
f"{current_output}fps=fps={fps},split{split_out_a}{split_out_b}"
|
||||
)
|
||||
|
||||
# 第一路:trim + freezeframes(在 start 帧处冻结 duration 秒)
|
||||
# freezeframes: 从 first 帧开始,用 replace 帧替换后续帧
|
||||
# 这样实现定格效果:在 start_frame 位置冻结
|
||||
# 第一路:trim(0, start) + tpad冻结
|
||||
# tpad=stop_mode=clone 将最后一帧冻结指定时长
|
||||
filter_parts.append(
|
||||
f"{split_out_a}trim=start=0:end={start_sec + duration_sec},"
|
||||
f"setpts=PTS-STARTPTS,"
|
||||
f"freezeframes=first={start_frame}:last={start_frame + duration_sec * fps - 1}:replace={start_frame}"
|
||||
f"{split_out_a}"
|
||||
f"{split_out_a}trim=start=0:end={start_sec},setpts=PTS-STARTPTS,"
|
||||
f"tpad=stop_mode=clone:stop_duration={duration_sec}{frozen_out}"
|
||||
)
|
||||
|
||||
# 第二路:trim 从 start 开始
|
||||
filter_parts.append(
|
||||
f"{split_out_b}trim=start={start_sec},setpts=PTS-STARTPTS{split_out_b}"
|
||||
f"{split_out_b}trim=start={start_sec},setpts=PTS-STARTPTS{rest_out}"
|
||||
)
|
||||
|
||||
# concat 拼接
|
||||
filter_parts.append(
|
||||
f"{split_out_a}{split_out_b}concat=n=2:v=1:a=0{effect_output}"
|
||||
f"{frozen_out}{rest_out}concat=n=2:v=1:a=0{effect_output}"
|
||||
)
|
||||
|
||||
current_output = effect_output
|
||||
effect_idx += 1
|
||||
|
||||
# 帧冻结(tpad)- 用于转场 overlap 区域
|
||||
# 帧冻结(tpad)- 用于转场 overlap 区域和时长不足补足
|
||||
tpad_parts = []
|
||||
|
||||
# 计算是否需要额外的尾部冻结(源视频时长不足)
|
||||
extra_tail_freeze_sec = 0.0
|
||||
if source_duration_sec is not None and render_spec is not None and duration_ms > 0:
|
||||
speed = float(render_spec.speed) if render_spec.speed else 1.0
|
||||
if speed > 0:
|
||||
# 计算变速后的有效时长
|
||||
effective_duration_sec = source_duration_sec / speed
|
||||
required_duration_sec = duration_ms / 1000.0
|
||||
|
||||
# 如果源视频时长不足,需要冻结最后一帧来补足
|
||||
if effective_duration_sec < required_duration_sec:
|
||||
extra_tail_freeze_sec = required_duration_sec - effective_duration_sec
|
||||
|
||||
if overlap_head_ms > 0:
|
||||
head_duration_sec = overlap_head_ms / 1000.0
|
||||
tpad_parts.append(f"start_mode=clone:start_duration={head_duration_sec}")
|
||||
if overlap_tail_ms > 0:
|
||||
tail_duration_sec = overlap_tail_ms / 1000.0
|
||||
tpad_parts.append(f"stop_mode=clone:stop_duration={tail_duration_sec}")
|
||||
|
||||
# 尾部冻结:合并 overlap 和时长不足的冻结
|
||||
total_tail_freeze_sec = (overlap_tail_ms / 1000.0) + extra_tail_freeze_sec
|
||||
if total_tail_freeze_sec > 0:
|
||||
tpad_parts.append(f"stop_mode=clone:stop_duration={total_tail_freeze_sec}")
|
||||
|
||||
if tpad_parts:
|
||||
tpad_output = '[v_tpad]'
|
||||
@@ -587,7 +705,15 @@ class RenderSegmentVideoHandler(BaseHandler):
|
||||
# 最终输出
|
||||
if has_overlay:
|
||||
# 叠加层处理
|
||||
filter_parts.append(f"{current_output}[1:v]overlay=0:0")
|
||||
# 视频 overlay 使用 eof_action=pass(结束后消失),图片 overlay 使用默认行为(保持显示)
|
||||
overlay_params = 'eof_action=pass' if is_video_overlay else ''
|
||||
overlay_filter = f"overlay=0:0:{overlay_params}" if overlay_params else 'overlay=0:0'
|
||||
overlay_scale = f"scale={width}:{height}"
|
||||
overlay_output = '[v_overlay]'
|
||||
# 视频 overlay 需要在末尾统一颜色范围,避免 overlay 结束后 range 从 tv 变为 pc
|
||||
range_fix = ',format=yuv420p,setrange=tv' if is_video_overlay else ''
|
||||
filter_parts.append(f"[1:v]{overlay_scale}{overlay_output}")
|
||||
filter_parts.append(f"{current_output}{overlay_output}{overlay_filter}{range_fix}")
|
||||
else:
|
||||
# 移除最后一个标签,直接输出
|
||||
# 将最后一个滤镜的输出标签替换为空(直接输出)
|
||||
|
||||
54
index.py
54
index.py
@@ -25,6 +25,8 @@ import sys
|
||||
import time
|
||||
import signal
|
||||
import logging
|
||||
import os
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
@@ -34,11 +36,55 @@ from services.task_executor import TaskExecutor
|
||||
from constant import SOFTWARE_VERSION
|
||||
|
||||
# 日志配置
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
def setup_logging():
|
||||
"""配置日志系统,输出到控制台和文件"""
|
||||
# 日志格式
|
||||
log_format = '[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s'
|
||||
date_format = '%Y-%m-%d %H:%M:%S'
|
||||
formatter = logging.Formatter(log_format, date_format)
|
||||
|
||||
# 获取根logger
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
# 清除已有的handlers(避免重复)
|
||||
root_logger.handlers.clear()
|
||||
|
||||
# 1. 控制台handler(只输出WARNING及以上级别)
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.WARNING)
|
||||
console_handler.setFormatter(formatter)
|
||||
root_logger.addHandler(console_handler)
|
||||
|
||||
# 确保日志文件所在目录存在
|
||||
log_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# 2. 所有日志文件handler(all_log.log)
|
||||
all_log_path = os.path.join(log_dir, 'all_log.log')
|
||||
all_log_handler = RotatingFileHandler(
|
||||
all_log_path,
|
||||
maxBytes=10*1024*1024, # 10MB
|
||||
backupCount=5,
|
||||
encoding='utf-8'
|
||||
)
|
||||
all_log_handler.setLevel(logging.DEBUG) # 记录所有级别
|
||||
all_log_handler.setFormatter(formatter)
|
||||
root_logger.addHandler(all_log_handler)
|
||||
|
||||
# 3. 错误日志文件handler(error.log)
|
||||
error_log_path = os.path.join(log_dir, 'error.log')
|
||||
error_log_handler = RotatingFileHandler(
|
||||
error_log_path,
|
||||
maxBytes=10*1024*1024, # 10MB
|
||||
backupCount=5,
|
||||
encoding='utf-8'
|
||||
)
|
||||
error_log_handler.setLevel(logging.ERROR) # 只记录ERROR及以上
|
||||
error_log_handler.setFormatter(formatter)
|
||||
root_logger.addHandler(error_log_handler)
|
||||
|
||||
# 初始化日志系统
|
||||
setup_logging()
|
||||
logger = logging.getLogger('worker')
|
||||
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ v2 API 客户端
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
import time
|
||||
import requests
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
@@ -24,6 +25,8 @@ class APIClientV2:
|
||||
负责与渲染服务端的所有 HTTP 通信。
|
||||
"""
|
||||
|
||||
SYSTEM_INFO_TTL_SECONDS = 30
|
||||
|
||||
def __init__(self, config: WorkerConfig):
|
||||
"""
|
||||
初始化 API 客户端
|
||||
@@ -37,6 +40,15 @@ class APIClientV2:
|
||||
self.worker_id = config.worker_id
|
||||
self.session = requests.Session()
|
||||
|
||||
self._ffmpeg_version: Optional[str] = None
|
||||
self._codec_info: Optional[str] = None
|
||||
self._hw_accel_info: Optional[str] = None
|
||||
self._gpu_info: Optional[str] = None
|
||||
self._gpu_info_checked = False
|
||||
self._static_system_info: Optional[Dict[str, Any]] = None
|
||||
self._system_info_cache: Optional[Dict[str, Any]] = None
|
||||
self._system_info_cache_ts = 0.0
|
||||
|
||||
# 设置默认请求头
|
||||
self.session.headers.update({
|
||||
'Content-Type': 'application/json',
|
||||
@@ -287,6 +299,8 @@ class APIClientV2:
|
||||
|
||||
def _get_ffmpeg_version(self) -> str:
|
||||
"""获取 FFmpeg 版本"""
|
||||
if self._ffmpeg_version is not None:
|
||||
return self._ffmpeg_version
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['ffmpeg', '-version'],
|
||||
@@ -299,13 +313,18 @@ class APIClientV2:
|
||||
parts = first_line.split()
|
||||
for i, part in enumerate(parts):
|
||||
if part == 'version' and i + 1 < len(parts):
|
||||
return parts[i + 1]
|
||||
return 'unknown'
|
||||
self._ffmpeg_version = parts[i + 1]
|
||||
return self._ffmpeg_version
|
||||
self._ffmpeg_version = 'unknown'
|
||||
return self._ffmpeg_version
|
||||
except Exception:
|
||||
return 'unknown'
|
||||
self._ffmpeg_version = 'unknown'
|
||||
return self._ffmpeg_version
|
||||
|
||||
def _get_codec_info(self) -> str:
|
||||
"""获取支持的编解码器信息"""
|
||||
if self._codec_info is not None:
|
||||
return self._codec_info
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['ffmpeg', '-codecs'],
|
||||
@@ -324,37 +343,60 @@ class APIClientV2:
|
||||
codecs.append('aac')
|
||||
if 'libfdk_aac' in output:
|
||||
codecs.append('libfdk_aac')
|
||||
return ', '.join(codecs) if codecs else 'unknown'
|
||||
self._codec_info = ', '.join(codecs) if codecs else 'unknown'
|
||||
return self._codec_info
|
||||
except Exception:
|
||||
return 'unknown'
|
||||
self._codec_info = 'unknown'
|
||||
return self._codec_info
|
||||
|
||||
def _get_system_info(self) -> Dict[str, Any]:
|
||||
"""获取系统信息"""
|
||||
try:
|
||||
now = time.monotonic()
|
||||
if (
|
||||
self._system_info_cache
|
||||
and now - self._system_info_cache_ts < self.SYSTEM_INFO_TTL_SECONDS
|
||||
):
|
||||
return self._system_info_cache
|
||||
|
||||
import platform
|
||||
import psutil
|
||||
|
||||
info = {
|
||||
if self._hw_accel_info is None:
|
||||
self._hw_accel_info = get_hw_accel_info_str()
|
||||
|
||||
if self._static_system_info is None:
|
||||
self._static_system_info = {
|
||||
'os': platform.system(),
|
||||
'cpu': f"{psutil.cpu_count()} cores",
|
||||
'memory': f"{psutil.virtual_memory().total // (1024**3)}GB",
|
||||
'hwAccelConfig': self.config.hw_accel, # 当前配置的硬件加速
|
||||
'hwAccelSupport': self._hw_accel_info, # 系统支持的硬件加速
|
||||
}
|
||||
|
||||
info = dict(self._static_system_info)
|
||||
info.update({
|
||||
'cpuUsage': f"{psutil.cpu_percent()}%",
|
||||
'memoryAvailable': f"{psutil.virtual_memory().available // (1024**3)}GB",
|
||||
'hwAccelConfig': self.config.hw_accel, # 当前配置的硬件加速
|
||||
'hwAccelSupport': get_hw_accel_info_str(), # 系统支持的硬件加速
|
||||
}
|
||||
})
|
||||
|
||||
# 尝试获取 GPU 信息
|
||||
gpu_info = self._get_gpu_info()
|
||||
if gpu_info:
|
||||
info['gpu'] = gpu_info
|
||||
|
||||
self._system_info_cache = info
|
||||
self._system_info_cache_ts = now
|
||||
return info
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def _get_gpu_info(self) -> Optional[str]:
|
||||
"""获取 GPU 信息"""
|
||||
if self._gpu_info_checked:
|
||||
return self._gpu_info
|
||||
|
||||
self._gpu_info_checked = True
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
|
||||
@@ -364,10 +406,11 @@ class APIClientV2:
|
||||
)
|
||||
if result.returncode == 0:
|
||||
gpu_name = result.stdout.strip().split('\n')[0]
|
||||
return gpu_name
|
||||
self._gpu_info = gpu_name
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
self._gpu_info = None
|
||||
|
||||
return self._gpu_info
|
||||
|
||||
def close(self):
|
||||
"""关闭会话"""
|
||||
|
||||
@@ -5,14 +5,18 @@
|
||||
提供素材下载缓存功能,避免相同素材重复下载。
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import hashlib
|
||||
import logging
|
||||
import shutil
|
||||
import time
|
||||
import uuid
|
||||
from typing import Optional, Tuple
|
||||
from urllib.parse import urlparse, unquote
|
||||
|
||||
import psutil
|
||||
|
||||
from services import storage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -59,6 +63,10 @@ class MaterialCache:
|
||||
负责素材文件的缓存存储和检索。
|
||||
"""
|
||||
|
||||
LOCK_TIMEOUT_SEC = 30.0
|
||||
LOCK_POLL_INTERVAL_SEC = 0.1
|
||||
LOCK_STALE_SECONDS = 24 * 60 * 60
|
||||
|
||||
def __init__(self, cache_dir: str, enabled: bool = True, max_size_gb: float = 0):
|
||||
"""
|
||||
初始化缓存管理器
|
||||
@@ -91,6 +99,134 @@ class MaterialCache:
|
||||
filename = f"{cache_key}{ext}"
|
||||
return os.path.join(self.cache_dir, filename)
|
||||
|
||||
def _get_lock_path(self, cache_key: str) -> str:
|
||||
"""获取缓存锁文件路径"""
|
||||
assert self.cache_dir
|
||||
return os.path.join(self.cache_dir, f"{cache_key}.lock")
|
||||
|
||||
def _write_lock_metadata(self, lock_fd: int, lock_path: str) -> bool:
|
||||
"""写入锁元数据,失败则清理锁文件"""
|
||||
try:
|
||||
try:
|
||||
process_start_time = psutil.Process(os.getpid()).create_time()
|
||||
except Exception as e:
|
||||
process_start_time = None
|
||||
logger.warning(f"Cache lock process start time error: {e}")
|
||||
metadata = {
|
||||
'pid': os.getpid(),
|
||||
'process_start_time': process_start_time,
|
||||
'created_at': time.time()
|
||||
}
|
||||
with os.fdopen(lock_fd, 'w', encoding='utf-8') as lock_file:
|
||||
json.dump(metadata, lock_file)
|
||||
return True
|
||||
except Exception as e:
|
||||
try:
|
||||
os.close(lock_fd)
|
||||
except Exception:
|
||||
pass
|
||||
self._remove_lock_file(lock_path, f"write metadata failed: {e}")
|
||||
return False
|
||||
|
||||
def _read_lock_metadata(self, lock_path: str) -> Optional[dict]:
|
||||
"""读取锁元数据,失败返回 None(兼容历史空锁文件)"""
|
||||
try:
|
||||
with open(lock_path, 'r', encoding='utf-8') as lock_file:
|
||||
data = json.load(lock_file)
|
||||
return data if isinstance(data, dict) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _is_process_alive(self, pid: int, expected_start_time: Optional[float]) -> bool:
|
||||
"""判断进程是否存活并校验启动时间(防止 PID 复用)"""
|
||||
try:
|
||||
process = psutil.Process(pid)
|
||||
if expected_start_time is None:
|
||||
return process.is_running()
|
||||
actual_start_time = process.create_time()
|
||||
return abs(actual_start_time - expected_start_time) < 1.0
|
||||
except psutil.NoSuchProcess:
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache lock process check error: {e}")
|
||||
return True
|
||||
|
||||
def _is_lock_stale(self, lock_path: str) -> bool:
|
||||
"""判断锁是否过期(进程已退出或超过最大存活时长)"""
|
||||
if not os.path.exists(lock_path):
|
||||
return False
|
||||
now = time.time()
|
||||
metadata = self._read_lock_metadata(lock_path)
|
||||
if metadata:
|
||||
created_at = metadata.get('created_at')
|
||||
if isinstance(created_at, (int, float)) and now - created_at > self.LOCK_STALE_SECONDS:
|
||||
return True
|
||||
pid = metadata.get('pid')
|
||||
pid_value = int(pid) if isinstance(pid, int) or (isinstance(pid, str) and pid.isdigit()) else None
|
||||
expected_start_time = metadata.get('process_start_time')
|
||||
expected_start_time_value = (
|
||||
expected_start_time if isinstance(expected_start_time, (int, float)) else None
|
||||
)
|
||||
if pid_value is not None and not self._is_process_alive(pid_value, expected_start_time_value):
|
||||
return True
|
||||
return self._is_lock_stale_by_mtime(lock_path, now)
|
||||
return self._is_lock_stale_by_mtime(lock_path, now)
|
||||
|
||||
def _is_lock_stale_by_mtime(self, lock_path: str, now: float) -> bool:
|
||||
"""基于文件时间判断锁是否过期"""
|
||||
try:
|
||||
mtime = os.path.getmtime(lock_path)
|
||||
return now - mtime > self.LOCK_STALE_SECONDS
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache lock stat error: {e}")
|
||||
return False
|
||||
|
||||
def _remove_lock_file(self, lock_path: str, reason: str = "") -> bool:
|
||||
"""删除锁文件"""
|
||||
try:
|
||||
os.remove(lock_path)
|
||||
if reason:
|
||||
logger.info(f"Cache lock removed: {lock_path} ({reason})")
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache lock remove error: {e}")
|
||||
return False
|
||||
|
||||
def _acquire_lock(self, cache_key: str) -> Optional[str]:
|
||||
"""获取缓存锁(跨进程安全)"""
|
||||
if not self.enabled:
|
||||
return None
|
||||
|
||||
lock_path = self._get_lock_path(cache_key)
|
||||
deadline = time.monotonic() + self.LOCK_TIMEOUT_SEC
|
||||
|
||||
while True:
|
||||
try:
|
||||
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
||||
if not self._write_lock_metadata(fd, lock_path):
|
||||
return None
|
||||
return lock_path
|
||||
except FileExistsError:
|
||||
if self._is_lock_stale(lock_path):
|
||||
removed = self._remove_lock_file(lock_path, "stale lock")
|
||||
if removed:
|
||||
continue
|
||||
if time.monotonic() >= deadline:
|
||||
logger.warning(f"Cache lock timeout: {lock_path}")
|
||||
return None
|
||||
time.sleep(self.LOCK_POLL_INTERVAL_SEC)
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache lock error: {e}")
|
||||
return None
|
||||
|
||||
def _release_lock(self, lock_path: Optional[str]) -> None:
|
||||
"""释放缓存锁"""
|
||||
if not lock_path:
|
||||
return
|
||||
self._remove_lock_file(lock_path)
|
||||
|
||||
def is_cached(self, url: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
检查素材是否已缓存
|
||||
@@ -136,8 +272,15 @@ class MaterialCache:
|
||||
if not self.enabled:
|
||||
return storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)
|
||||
|
||||
# 检查缓存
|
||||
cached, cache_path = self.is_cached(url)
|
||||
cache_key = _extract_cache_key(url)
|
||||
lock_path = self._acquire_lock(cache_key)
|
||||
if not lock_path:
|
||||
logger.warning(f"Cache lock unavailable, downloading without cache: {url[:80]}...")
|
||||
return storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)
|
||||
|
||||
try:
|
||||
cache_path = self.get_cache_path(url)
|
||||
cached = os.path.exists(cache_path) and os.path.getsize(cache_path) > 0
|
||||
|
||||
if cached:
|
||||
# 命中缓存,复制到目标路径
|
||||
@@ -159,8 +302,11 @@ class MaterialCache:
|
||||
# 未命中缓存,下载到缓存目录
|
||||
logger.debug(f"Cache miss: {url[:80]}...")
|
||||
|
||||
# 先下载到临时文件
|
||||
temp_cache_path = cache_path + '.downloading'
|
||||
# 先下载到临时文件(唯一文件名,避免并发覆盖)
|
||||
temp_cache_path = os.path.join(
|
||||
self.cache_dir,
|
||||
f"{cache_key}.{uuid.uuid4().hex}.downloading"
|
||||
)
|
||||
try:
|
||||
if not storage.download_file(url, temp_cache_path, max_retries=max_retries, timeout=timeout):
|
||||
# 下载失败,清理临时文件
|
||||
@@ -168,10 +314,13 @@ class MaterialCache:
|
||||
os.remove(temp_cache_path)
|
||||
return False
|
||||
|
||||
# 下载成功,移动到正式缓存路径
|
||||
if os.path.exists(cache_path):
|
||||
os.remove(cache_path)
|
||||
os.rename(temp_cache_path, cache_path)
|
||||
if not os.path.exists(temp_cache_path) or os.path.getsize(temp_cache_path) <= 0:
|
||||
if os.path.exists(temp_cache_path):
|
||||
os.remove(temp_cache_path)
|
||||
return False
|
||||
|
||||
# 下载成功,原子替换缓存文件
|
||||
os.replace(temp_cache_path, cache_path)
|
||||
|
||||
# 复制到目标路径
|
||||
shutil.copy2(cache_path, dest)
|
||||
@@ -193,6 +342,68 @@ class MaterialCache:
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
finally:
|
||||
self._release_lock(lock_path)
|
||||
|
||||
def add_to_cache(self, url: str, source_path: str) -> bool:
|
||||
"""
|
||||
将本地文件添加到缓存
|
||||
|
||||
Args:
|
||||
url: 对应的 URL(用于生成缓存键)
|
||||
source_path: 本地文件路径
|
||||
|
||||
Returns:
|
||||
是否成功
|
||||
"""
|
||||
if not self.enabled:
|
||||
return False
|
||||
|
||||
if not os.path.exists(source_path):
|
||||
logger.warning(f"Source file not found for cache: {source_path}")
|
||||
return False
|
||||
|
||||
cache_key = _extract_cache_key(url)
|
||||
lock_path = self._acquire_lock(cache_key)
|
||||
if not lock_path:
|
||||
logger.warning(f"Cache lock unavailable for adding: {url[:80]}...")
|
||||
return False
|
||||
|
||||
try:
|
||||
cache_path = self.get_cache_path(url)
|
||||
|
||||
# 先复制到临时文件
|
||||
temp_cache_path = os.path.join(
|
||||
self.cache_dir,
|
||||
f"{cache_key}.{uuid.uuid4().hex}.adding"
|
||||
)
|
||||
|
||||
shutil.copy2(source_path, temp_cache_path)
|
||||
|
||||
# 原子替换
|
||||
os.replace(temp_cache_path, cache_path)
|
||||
|
||||
# 更新访问时间
|
||||
os.utime(cache_path, None)
|
||||
|
||||
logger.info(f"Added to cache: {url[:80]}... <- {source_path}")
|
||||
|
||||
# 检查清理
|
||||
if self.max_size_bytes > 0:
|
||||
self._cleanup_if_needed()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to add to cache: {e}")
|
||||
if 'temp_cache_path' in locals() and os.path.exists(temp_cache_path):
|
||||
try:
|
||||
os.remove(temp_cache_path)
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
finally:
|
||||
self._release_lock(lock_path)
|
||||
|
||||
def _cleanup_if_needed(self) -> None:
|
||||
"""
|
||||
@@ -209,7 +420,7 @@ class MaterialCache:
|
||||
total_size = 0
|
||||
|
||||
for filename in os.listdir(self.cache_dir):
|
||||
if filename.endswith('.downloading'):
|
||||
if filename.endswith('.downloading') or filename.endswith('.lock'):
|
||||
continue
|
||||
file_path = os.path.join(self.cache_dir, filename)
|
||||
if os.path.isfile(file_path):
|
||||
@@ -235,6 +446,17 @@ class MaterialCache:
|
||||
for file_info in cache_files:
|
||||
if total_size <= target_size:
|
||||
break
|
||||
# 从文件名提取 cache_key,检查是否有锁(说明正在被使用)
|
||||
filename = os.path.basename(file_info['path'])
|
||||
cache_key = os.path.splitext(filename)[0]
|
||||
lock_path = self._get_lock_path(cache_key)
|
||||
if os.path.exists(lock_path):
|
||||
if self._is_lock_stale(lock_path):
|
||||
self._remove_lock_file(lock_path, "cleanup stale lock")
|
||||
else:
|
||||
# 该文件正在被其他任务使用,跳过删除
|
||||
logger.debug(f"Cache cleanup: skipping locked file {filename}")
|
||||
continue
|
||||
try:
|
||||
os.remove(file_info['path'])
|
||||
total_size -= file_info['size']
|
||||
@@ -275,7 +497,7 @@ class MaterialCache:
|
||||
total_size = 0
|
||||
|
||||
for filename in os.listdir(self.cache_dir):
|
||||
if filename.endswith('.downloading'):
|
||||
if filename.endswith('.downloading') or filename.endswith('.lock'):
|
||||
continue
|
||||
file_path = os.path.join(self.cache_dir, filename)
|
||||
if os.path.isfile(file_path):
|
||||
|
||||
164
services/gpu_scheduler.py
Normal file
164
services/gpu_scheduler.py
Normal file
@@ -0,0 +1,164 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
GPU 调度器
|
||||
|
||||
提供多 GPU 设备的轮询调度功能。
|
||||
"""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import List, Optional
|
||||
|
||||
from domain.config import WorkerConfig
|
||||
from domain.gpu import GPUDevice
|
||||
from util.system import get_all_gpu_info, validate_gpu_device
|
||||
from constant import HW_ACCEL_CUDA, HW_ACCEL_QSV
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GPUScheduler:
|
||||
"""
|
||||
GPU 调度器
|
||||
|
||||
实现多 GPU 设备的轮询(Round Robin)调度。
|
||||
线程安全,支持并发任务执行。
|
||||
|
||||
使用方式:
|
||||
scheduler = GPUScheduler(config)
|
||||
|
||||
# 在任务执行时
|
||||
device_index = scheduler.acquire()
|
||||
try:
|
||||
# 执行任务
|
||||
pass
|
||||
finally:
|
||||
scheduler.release(device_index)
|
||||
"""
|
||||
|
||||
def __init__(self, config: WorkerConfig):
|
||||
"""
|
||||
初始化调度器
|
||||
|
||||
Args:
|
||||
config: Worker 配置
|
||||
"""
|
||||
self._config = config
|
||||
self._devices: List[GPUDevice] = []
|
||||
self._next_index: int = 0
|
||||
self._lock = threading.Lock()
|
||||
self._enabled = False
|
||||
|
||||
# 初始化设备列表
|
||||
self._init_devices()
|
||||
|
||||
def _init_devices(self) -> None:
|
||||
"""初始化 GPU 设备列表"""
|
||||
# 仅在启用硬件加速时才初始化
|
||||
if self._config.hw_accel not in (HW_ACCEL_CUDA, HW_ACCEL_QSV):
|
||||
logger.info("Hardware acceleration not enabled, GPU scheduler disabled")
|
||||
return
|
||||
|
||||
configured_devices = self._config.gpu_devices
|
||||
|
||||
if configured_devices:
|
||||
# 使用配置指定的设备
|
||||
self._devices = self._validate_configured_devices(configured_devices)
|
||||
else:
|
||||
# 自动检测所有设备
|
||||
self._devices = self._auto_detect_devices()
|
||||
|
||||
if self._devices:
|
||||
self._enabled = True
|
||||
device_info = ', '.join(str(d) for d in self._devices)
|
||||
logger.info(f"GPU scheduler initialized with {len(self._devices)} device(s): {device_info}")
|
||||
else:
|
||||
logger.warning("No GPU devices available, scheduler disabled")
|
||||
|
||||
def _validate_configured_devices(self, indices: List[int]) -> List[GPUDevice]:
|
||||
"""
|
||||
验证配置的设备列表
|
||||
|
||||
Args:
|
||||
indices: 配置的设备索引列表
|
||||
|
||||
Returns:
|
||||
验证通过的设备列表
|
||||
"""
|
||||
devices = []
|
||||
for index in indices:
|
||||
if validate_gpu_device(index):
|
||||
devices.append(GPUDevice(
|
||||
index=index,
|
||||
name=f"GPU-{index}",
|
||||
available=True
|
||||
))
|
||||
else:
|
||||
logger.warning(f"GPU device {index} is not available, skipping")
|
||||
return devices
|
||||
|
||||
def _auto_detect_devices(self) -> List[GPUDevice]:
|
||||
"""
|
||||
自动检测所有可用 GPU
|
||||
|
||||
Returns:
|
||||
检测到的设备列表
|
||||
"""
|
||||
all_devices = get_all_gpu_info()
|
||||
# 过滤不可用设备
|
||||
return [d for d in all_devices if d.available]
|
||||
|
||||
@property
|
||||
def enabled(self) -> bool:
|
||||
"""调度器是否启用"""
|
||||
return self._enabled
|
||||
|
||||
@property
|
||||
def device_count(self) -> int:
|
||||
"""设备数量"""
|
||||
return len(self._devices)
|
||||
|
||||
def acquire(self) -> Optional[int]:
|
||||
"""
|
||||
获取下一个可用的 GPU 设备(轮询调度)
|
||||
|
||||
Returns:
|
||||
GPU 设备索引,如果调度器未启用或无设备则返回 None
|
||||
"""
|
||||
if not self._enabled or not self._devices:
|
||||
return None
|
||||
|
||||
with self._lock:
|
||||
device = self._devices[self._next_index]
|
||||
self._next_index = (self._next_index + 1) % len(self._devices)
|
||||
logger.debug(f"Acquired GPU device: {device.index}")
|
||||
return device.index
|
||||
|
||||
def release(self, device_index: Optional[int]) -> None:
|
||||
"""
|
||||
释放 GPU 设备
|
||||
|
||||
当前实现为无状态轮询,此方法仅用于日志记录。
|
||||
|
||||
Args:
|
||||
device_index: 设备索引
|
||||
"""
|
||||
if device_index is not None:
|
||||
logger.debug(f"Released GPU device: {device_index}")
|
||||
|
||||
def get_status(self) -> dict:
|
||||
"""
|
||||
获取调度器状态信息
|
||||
|
||||
Returns:
|
||||
状态字典
|
||||
"""
|
||||
return {
|
||||
'enabled': self._enabled,
|
||||
'device_count': len(self._devices),
|
||||
'devices': [
|
||||
{'index': d.index, 'name': d.name, 'available': d.available}
|
||||
for d in self._devices
|
||||
],
|
||||
'hw_accel': self._config.hw_accel,
|
||||
}
|
||||
@@ -7,13 +7,38 @@
|
||||
|
||||
import os
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Optional
|
||||
from urllib.parse import unquote
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# 文件扩展名到 Content-Type 的映射
|
||||
_CONTENT_TYPE_MAP = {
|
||||
'.mp4': 'video/mp4',
|
||||
'.aac': 'audio/aac',
|
||||
'.ts': 'video/mp2t',
|
||||
'.m4a': 'audio/mp4',
|
||||
}
|
||||
|
||||
|
||||
def _get_content_type(file_path: str) -> str:
|
||||
"""
|
||||
根据文件扩展名获取 Content-Type
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
Content-Type 字符串
|
||||
"""
|
||||
ext = os.path.splitext(file_path)[1].lower()
|
||||
return _CONTENT_TYPE_MAP.get(ext, 'application/octet-stream')
|
||||
|
||||
|
||||
def _apply_http_replace_map(url: str) -> str:
|
||||
"""
|
||||
应用 HTTP_REPLACE_MAP 环境变量替换 URL
|
||||
@@ -61,6 +86,7 @@ def upload_file(url: str, file_path: str, max_retries: int = 5, timeout: int = 6
|
||||
|
||||
# 检查是否使用 rclone 上传
|
||||
if os.getenv("UPLOAD_METHOD") == "rclone":
|
||||
logger.info(f"Uploading to: {url}")
|
||||
result = _upload_with_rclone(url, file_path)
|
||||
if result:
|
||||
return True
|
||||
@@ -68,18 +94,20 @@ def upload_file(url: str, file_path: str, max_retries: int = 5, timeout: int = 6
|
||||
|
||||
# 应用 HTTP_REPLACE_MAP 替换 URL
|
||||
http_url = _apply_http_replace_map(url)
|
||||
content_type = _get_content_type(file_path)
|
||||
logger.info(f"Uploading to: {http_url} (Content-Type: {content_type})")
|
||||
|
||||
retries = 0
|
||||
while retries < max_retries:
|
||||
try:
|
||||
with open(file_path, 'rb') as f:
|
||||
response = requests.put(
|
||||
with requests.put(
|
||||
http_url,
|
||||
data=f,
|
||||
stream=True,
|
||||
timeout=timeout,
|
||||
headers={"Content-Type": "application/octet-stream"}
|
||||
)
|
||||
headers={"Content-Type": content_type}
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
logger.info(f"Upload succeeded: {file_path}")
|
||||
return True
|
||||
@@ -111,7 +139,6 @@ def _upload_with_rclone(url: str, file_path: str) -> bool:
|
||||
return False
|
||||
|
||||
config_file = os.getenv("RCLONE_CONFIG_FILE", "")
|
||||
rclone_config = f"--config {config_file}" if config_file else ""
|
||||
|
||||
# 替换 URL
|
||||
new_url = url
|
||||
@@ -119,23 +146,35 @@ def _upload_with_rclone(url: str, file_path: str) -> bool:
|
||||
for src, dst in replace_list:
|
||||
new_url = new_url.replace(src, dst)
|
||||
new_url = new_url.split("?", 1)[0] # 移除查询参数
|
||||
new_url = unquote(new_url) # 解码 URL 编码的字符(如 %2F -> /)
|
||||
|
||||
if new_url == url:
|
||||
return False
|
||||
|
||||
cmd = (
|
||||
f"rclone copyto --no-check-dest --ignore-existing "
|
||||
f"--multi-thread-chunk-size 8M --multi-thread-streams 8 "
|
||||
f"{rclone_config} {file_path} {new_url}"
|
||||
)
|
||||
logger.debug(f"rclone command: {cmd}")
|
||||
cmd = [
|
||||
"rclone",
|
||||
"copyto",
|
||||
"--no-check-dest",
|
||||
"--ignore-existing",
|
||||
"--multi-thread-chunk-size",
|
||||
"8M",
|
||||
"--multi-thread-streams",
|
||||
"8",
|
||||
]
|
||||
if config_file:
|
||||
cmd.extend(["--config", config_file])
|
||||
cmd.extend([file_path, new_url])
|
||||
|
||||
result = os.system(cmd)
|
||||
if result == 0:
|
||||
logger.debug(f"rclone command: {' '.join(cmd)}")
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
logger.info(f"rclone upload succeeded: {file_path}")
|
||||
return True
|
||||
|
||||
logger.warning(f"rclone upload failed (code={result}): {file_path}")
|
||||
stderr = (result.stderr or '').strip()
|
||||
stderr = stderr[:500] if stderr else ""
|
||||
logger.warning(f"rclone upload failed (code={result.returncode}): {file_path} {stderr}")
|
||||
return False
|
||||
|
||||
|
||||
@@ -177,7 +216,7 @@ def download_file(
|
||||
retries = 0
|
||||
while retries < max_retries:
|
||||
try:
|
||||
response = requests.get(http_url, timeout=timeout, stream=True)
|
||||
with requests.get(http_url, timeout=timeout, stream=True) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
|
||||
@@ -12,9 +12,16 @@ from typing import Dict, Optional, TYPE_CHECKING
|
||||
|
||||
from domain.task import Task, TaskType
|
||||
from domain.result import TaskResult, ErrorCode
|
||||
|
||||
# 需要 GPU 加速的任务类型
|
||||
GPU_REQUIRED_TASK_TYPES = {
|
||||
TaskType.RENDER_SEGMENT_VIDEO,
|
||||
TaskType.COMPOSE_TRANSITION,
|
||||
}
|
||||
from domain.config import WorkerConfig
|
||||
from core.handler import TaskHandler
|
||||
from services.lease_service import LeaseService
|
||||
from services.gpu_scheduler import GPUScheduler
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from services.api_client import APIClientV2
|
||||
@@ -60,6 +67,12 @@ class TaskExecutor:
|
||||
# 线程安全锁
|
||||
self.lock = threading.Lock()
|
||||
|
||||
# GPU 调度器(如果启用硬件加速)
|
||||
self.gpu_scheduler = GPUScheduler(config)
|
||||
|
||||
if self.gpu_scheduler.enabled:
|
||||
logger.info(f"GPU scheduler enabled with {self.gpu_scheduler.device_count} device(s)")
|
||||
|
||||
# 注册处理器
|
||||
self._register_handlers()
|
||||
|
||||
@@ -130,6 +143,14 @@ class TaskExecutor:
|
||||
logger.warning(f"[task:{task.task_id}] Task already running, skipping")
|
||||
return False
|
||||
|
||||
# 检查并发上限
|
||||
if len(self.current_tasks) >= self.config.max_concurrency:
|
||||
logger.info(
|
||||
f"[task:{task.task_id}] Max concurrency reached "
|
||||
f"({self.config.max_concurrency}), rejecting task"
|
||||
)
|
||||
return False
|
||||
|
||||
# 检查是否有对应的处理器
|
||||
if task.task_type not in self.handlers:
|
||||
logger.error(f"[task:{task.task_id}] No handler for type: {task.task_type.value}")
|
||||
@@ -164,15 +185,28 @@ class TaskExecutor:
|
||||
)
|
||||
lease_service.start()
|
||||
|
||||
# 获取 GPU 设备(仅对需要 GPU 的任务类型)
|
||||
device_index = None
|
||||
needs_gpu = task.task_type in GPU_REQUIRED_TASK_TYPES
|
||||
if needs_gpu and self.gpu_scheduler.enabled:
|
||||
device_index = self.gpu_scheduler.acquire()
|
||||
if device_index is not None:
|
||||
logger.info(f"[task:{task_id}] Assigned to GPU device {device_index}")
|
||||
|
||||
# 获取处理器(需要在设置 GPU 设备前获取)
|
||||
handler = self.handlers.get(task.task_type)
|
||||
|
||||
try:
|
||||
# 报告任务开始
|
||||
self.api_client.report_start(task_id)
|
||||
|
||||
# 获取处理器
|
||||
handler = self.handlers.get(task.task_type)
|
||||
if not handler:
|
||||
raise ValueError(f"No handler for task type: {task.task_type}")
|
||||
|
||||
# 设置 GPU 设备(线程本地存储)
|
||||
if device_index is not None:
|
||||
handler.set_gpu_device(device_index)
|
||||
|
||||
# 执行前钩子
|
||||
handler.before_handle(task)
|
||||
|
||||
@@ -196,6 +230,14 @@ class TaskExecutor:
|
||||
self.api_client.report_fail(task_id, 'E_UNKNOWN', str(e))
|
||||
|
||||
finally:
|
||||
# 清除 GPU 设备设置
|
||||
if handler:
|
||||
handler.clear_gpu_device()
|
||||
|
||||
# 释放 GPU 设备(仅当实际分配了设备时)
|
||||
if device_index is not None:
|
||||
self.gpu_scheduler.release(device_index)
|
||||
|
||||
# 停止租约续期
|
||||
lease_service.stop()
|
||||
|
||||
|
||||
15
tests/unit/test_material_cache_lock.py
Normal file
15
tests/unit/test_material_cache_lock.py
Normal file
@@ -0,0 +1,15 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
|
||||
from services.cache import MaterialCache, _extract_cache_key
|
||||
|
||||
|
||||
def test_cache_lock_acquire_release(tmp_path):
|
||||
cache = MaterialCache(cache_dir=str(tmp_path), enabled=True, max_size_gb=0)
|
||||
cache_key = _extract_cache_key("https://example.com/path/file.mp4?token=abc")
|
||||
lock_path = cache._acquire_lock(cache_key)
|
||||
assert lock_path
|
||||
assert os.path.exists(lock_path)
|
||||
cache._release_lock(lock_path)
|
||||
assert not os.path.exists(lock_path)
|
||||
@@ -5,13 +5,17 @@
|
||||
提供系统信息采集功能。
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Optional, Dict, Any
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
import psutil
|
||||
from constant import SOFTWARE_VERSION, DEFAULT_CAPABILITIES, HW_ACCEL_NONE, HW_ACCEL_QSV, HW_ACCEL_CUDA
|
||||
from domain.gpu import GPUDevice
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_sys_info():
|
||||
@@ -264,3 +268,78 @@ def get_hw_accel_info_str() -> str:
|
||||
return "No hardware acceleration available"
|
||||
|
||||
return ', '.join(parts) + f" [recommended: {support['recommended']}]"
|
||||
|
||||
|
||||
def get_all_gpu_info() -> List[GPUDevice]:
|
||||
"""
|
||||
获取所有 NVIDIA GPU 信息
|
||||
|
||||
使用 nvidia-smi 查询所有 GPU 设备。
|
||||
|
||||
Returns:
|
||||
GPU 设备列表,失败返回空列表
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
'nvidia-smi',
|
||||
'--query-gpu=index,name,memory.total',
|
||||
'--format=csv,noheader,nounits'
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
|
||||
devices = []
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if not line.strip():
|
||||
continue
|
||||
parts = [p.strip() for p in line.split(',')]
|
||||
if len(parts) >= 2:
|
||||
index = int(parts[0])
|
||||
name = parts[1]
|
||||
memory = int(parts[2]) if len(parts) >= 3 else None
|
||||
devices.append(GPUDevice(
|
||||
index=index,
|
||||
name=name,
|
||||
memory_total=memory,
|
||||
available=True
|
||||
))
|
||||
|
||||
return devices
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to detect GPUs: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def validate_gpu_device(index: int) -> bool:
|
||||
"""
|
||||
验证指定索引的 GPU 设备是否可用
|
||||
|
||||
Args:
|
||||
index: GPU 设备索引
|
||||
|
||||
Returns:
|
||||
设备是否可用
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
'nvidia-smi',
|
||||
'-i', str(index),
|
||||
'--query-gpu=name',
|
||||
'--format=csv,noheader'
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
return result.returncode == 0 and bool(result.stdout.strip())
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user