feat(render): 实现渲染系统v2核心架构

- 添加v2支持的任务类型常量定义
- 更新软件版本至0.0.9
- 定义v2统一音视频编码参数
- 实现系统信息工具get_sys_info_v2方法
- 新增get_capabilities和_get_gpu_info功能
- 创建core模块及TaskHandler抽象基类
- 添加渲染系统设计文档包括集群架构、v2 PRD和Worker PRD
- 实现任务处理器抽象基类及接口规范
This commit is contained in:
2026-01-11 21:14:02 +08:00
parent 357c0afb3b
commit 24de32e6bb
19 changed files with 2812 additions and 3 deletions

16
services/__init__.py Normal file
View File

@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
"""
服务层
包含 API 客户端、任务执行器、租约服务等服务组件。
"""
from services.api_client import APIClientV2
from services.lease_service import LeaseService
from services.task_executor import TaskExecutor
__all__ = [
'APIClientV2',
'LeaseService',
'TaskExecutor',
]

371
services/api_client.py Normal file
View File

@@ -0,0 +1,371 @@
# -*- coding: utf-8 -*-
"""
v2 API 客户端
实现与渲染服务端 v2 接口的通信。
"""
import logging
import subprocess
import requests
from typing import Dict, List, Optional, Any
from domain.task import Task
from domain.config import WorkerConfig
logger = logging.getLogger(__name__)
class APIClientV2:
"""
v2 API 客户端
负责与渲染服务端的所有 HTTP 通信。
"""
def __init__(self, config: WorkerConfig):
"""
初始化 API 客户端
Args:
config: Worker 配置
"""
self.config = config
self.base_url = config.api_endpoint.rstrip('/')
self.access_key = config.access_key
self.worker_id = config.worker_id
self.session = requests.Session()
# 设置默认请求头
self.session.headers.update({
'Content-Type': 'application/json',
'Accept': 'application/json'
})
def sync(self, current_task_ids: List[str]) -> List[Task]:
"""
心跳同步并拉取任务
Args:
current_task_ids: 当前正在执行的任务 ID 列表
Returns:
List[Task]: 新分配的任务列表
"""
url = f"{self.base_url}/render/v2/worker/sync"
# 将 task_id 转换为整数(服务端期望 []int64)
task_ids_int = [int(tid) for tid in current_task_ids if tid.isdigit()]
payload = {
'accessKey': self.access_key,
'workerId': self.worker_id,
'capabilities': self.config.capabilities,
'maxConcurrency': self.config.max_concurrency,
'currentTaskCount': len(current_task_ids),
'currentTaskIds': task_ids_int,
'ffmpegVersion': self._get_ffmpeg_version(),
'codecInfo': self._get_codec_info(),
'systemInfo': self._get_system_info()
}
try:
resp = self.session.post(url, json=payload, timeout=10)
resp.raise_for_status()
data = resp.json()
if data.get('code') != 200:
logger.warning(f"Sync failed: {data.get('message')}")
return []
# 解析任务列表
tasks = []
for task_data in data.get('data', {}).get('tasks', []):
try:
task = Task.from_dict(task_data)
tasks.append(task)
except Exception as e:
logger.error(f"Failed to parse task: {e}")
if tasks:
logger.info(f"Received {len(tasks)} new tasks")
return tasks
except requests.exceptions.Timeout:
logger.warning("Sync timeout")
return []
except requests.exceptions.RequestException as e:
logger.error(f"Sync request error: {e}")
return []
except Exception as e:
logger.error(f"Sync error: {e}")
return []
def report_start(self, task_id: str) -> bool:
"""
报告任务开始
Args:
task_id: 任务 ID
Returns:
bool: 是否成功
"""
url = f"{self.base_url}/render/v2/task/{task_id}/start"
try:
resp = self.session.post(
url,
json={'workerId': self.worker_id},
timeout=10
)
if resp.status_code == 200:
logger.debug(f"[task:{task_id}] Start reported")
return True
else:
logger.warning(f"[task:{task_id}] Report start failed: {resp.status_code}")
return False
except Exception as e:
logger.error(f"[task:{task_id}] Report start error: {e}")
return False
def report_success(self, task_id: str, result: Dict[str, Any]) -> bool:
"""
报告任务成功
Args:
task_id: 任务 ID
result: 任务结果数据
Returns:
bool: 是否成功
"""
url = f"{self.base_url}/render/v2/task/{task_id}/success"
try:
resp = self.session.post(
url,
json={
'workerId': self.worker_id,
'result': result
},
timeout=10
)
if resp.status_code == 200:
logger.debug(f"[task:{task_id}] Success reported")
return True
else:
logger.warning(f"[task:{task_id}] Report success failed: {resp.status_code}")
return False
except Exception as e:
logger.error(f"[task:{task_id}] Report success error: {e}")
return False
def report_fail(self, task_id: str, error_code: str, error_message: str) -> bool:
"""
报告任务失败
Args:
task_id: 任务 ID
error_code: 错误码
error_message: 错误信息
Returns:
bool: 是否成功
"""
url = f"{self.base_url}/render/v2/task/{task_id}/fail"
try:
resp = self.session.post(
url,
json={
'workerId': self.worker_id,
'errorCode': error_code,
'errorMessage': error_message[:1000] # 限制长度
},
timeout=10
)
if resp.status_code == 200:
logger.debug(f"[task:{task_id}] Failure reported")
return True
else:
logger.warning(f"[task:{task_id}] Report fail failed: {resp.status_code}")
return False
except Exception as e:
logger.error(f"[task:{task_id}] Report fail error: {e}")
return False
def get_upload_url(self, task_id: str, file_type: str, file_name: str = None) -> Optional[Dict[str, str]]:
"""
获取上传 URL
Args:
task_id: 任务 ID
file_type: 文件类型(video/audio/ts/mp4)
file_name: 文件名(可选)
Returns:
Dict 包含 uploadUrl 和 accessUrl,失败返回 None
"""
url = f"{self.base_url}/render/v2/task/{task_id}/uploadUrl"
payload = {'fileType': file_type}
if file_name:
payload['fileName'] = file_name
try:
resp = self.session.post(url, json=payload, timeout=10)
if resp.status_code == 200:
data = resp.json()
if data.get('code') == 200:
return data.get('data')
logger.warning(f"[task:{task_id}] Get upload URL failed: {resp.status_code}")
return None
except Exception as e:
logger.error(f"[task:{task_id}] Get upload URL error: {e}")
return None
def extend_lease(self, task_id: str, extension: int = None) -> bool:
"""
延长租约
Args:
task_id: 任务 ID
extension: 延长秒数(默认使用配置值)
Returns:
bool: 是否成功
"""
if extension is None:
extension = self.config.lease_extension_duration
url = f"{self.base_url}/render/v2/task/{task_id}/extend-lease"
try:
resp = self.session.post(
url,
params={
'workerId': self.worker_id,
'extension': extension
},
timeout=10
)
if resp.status_code == 200:
logger.debug(f"[task:{task_id}] Lease extended by {extension}s")
return True
else:
logger.warning(f"[task:{task_id}] Extend lease failed: {resp.status_code}")
return False
except Exception as e:
logger.error(f"[task:{task_id}] Extend lease error: {e}")
return False
def get_task_info(self, task_id: str) -> Optional[Dict]:
"""
获取任务详情
Args:
task_id: 任务 ID
Returns:
任务详情字典,失败返回 None
"""
url = f"{self.base_url}/render/v2/task/{task_id}"
try:
resp = self.session.get(url, timeout=10)
if resp.status_code == 200:
data = resp.json()
if data.get('code') == 200:
return data.get('data')
return None
except Exception as e:
logger.error(f"[task:{task_id}] Get task info error: {e}")
return None
def _get_ffmpeg_version(self) -> str:
"""获取 FFmpeg 版本"""
try:
result = subprocess.run(
['ffmpeg', '-version'],
capture_output=True,
text=True,
timeout=5
)
first_line = result.stdout.split('\n')[0]
if 'version' in first_line:
parts = first_line.split()
for i, part in enumerate(parts):
if part == 'version' and i + 1 < len(parts):
return parts[i + 1]
return 'unknown'
except Exception:
return 'unknown'
def _get_codec_info(self) -> str:
"""获取支持的编解码器信息"""
try:
result = subprocess.run(
['ffmpeg', '-codecs'],
capture_output=True,
text=True,
timeout=5
)
# 检查常用编解码器
codecs = []
output = result.stdout
if 'libx264' in output:
codecs.append('libx264')
if 'libx265' in output or 'hevc' in output:
codecs.append('libx265')
if 'aac' in output:
codecs.append('aac')
if 'libfdk_aac' in output:
codecs.append('libfdk_aac')
return ', '.join(codecs) if codecs else 'unknown'
except Exception:
return 'unknown'
def _get_system_info(self) -> Dict[str, Any]:
"""获取系统信息"""
try:
import platform
import psutil
info = {
'os': platform.system(),
'cpu': f"{psutil.cpu_count()} cores",
'memory': f"{psutil.virtual_memory().total // (1024**3)}GB",
'cpuUsage': f"{psutil.cpu_percent()}%",
'memoryAvailable': f"{psutil.virtual_memory().available // (1024**3)}GB"
}
# 尝试获取 GPU 信息
gpu_info = self._get_gpu_info()
if gpu_info:
info['gpu'] = gpu_info
return info
except Exception:
return {}
def _get_gpu_info(self) -> Optional[str]:
"""获取 GPU 信息"""
try:
result = subprocess.run(
['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
gpu_name = result.stdout.strip().split('\n')[0]
return gpu_name
except Exception:
pass
return None
def close(self):
"""关闭会话"""
self.session.close()

110
services/lease_service.py Normal file
View File

@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*-
"""
租约续期服务
后台线程定期为正在执行的任务续期租约。
"""
import logging
import threading
import time
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from services.api_client import APIClientV2
logger = logging.getLogger(__name__)
class LeaseService:
"""
租约续期服务
在后台线程中定期调用 API 延长任务租约,
防止长时间任务因租约过期被回收。
"""
def __init__(
self,
api_client: 'APIClientV2',
task_id: str,
interval: int = 60,
extension: int = 300
):
"""
初始化租约服务
Args:
api_client: API 客户端
task_id: 任务 ID
interval: 续期间隔(秒),默认 60 秒
extension: 每次续期时长(秒),默认 300 秒
"""
self.api_client = api_client
self.task_id = task_id
self.interval = interval
self.extension = extension
self.running = False
self.thread: threading.Thread = None
self._stop_event = threading.Event()
def start(self):
"""启动租约续期线程"""
if self.running:
logger.warning(f"[task:{self.task_id}] Lease service already running")
return
self.running = True
self._stop_event.clear()
self.thread = threading.Thread(
target=self._run,
name=f"LeaseService-{self.task_id}",
daemon=True
)
self.thread.start()
logger.debug(f"[task:{self.task_id}] Lease service started (interval={self.interval}s)")
def stop(self):
"""停止租约续期线程"""
if not self.running:
return
self.running = False
self._stop_event.set()
if self.thread and self.thread.is_alive():
self.thread.join(timeout=5)
logger.debug(f"[task:{self.task_id}] Lease service stopped")
def _run(self):
"""续期线程主循环"""
while self.running:
# 等待指定间隔或收到停止信号
if self._stop_event.wait(timeout=self.interval):
# 收到停止信号
break
if self.running:
self._extend_lease()
def _extend_lease(self):
"""执行租约续期"""
try:
success = self.api_client.extend_lease(self.task_id, self.extension)
if success:
logger.debug(f"[task:{self.task_id}] Lease extended by {self.extension}s")
else:
logger.warning(f"[task:{self.task_id}] Failed to extend lease")
except Exception as e:
logger.warning(f"[task:{self.task_id}] Lease extension error: {e}")
def __enter__(self):
"""上下文管理器入口"""
self.start()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""上下文管理器出口"""
self.stop()
return False

234
services/task_executor.py Normal file
View File

@@ -0,0 +1,234 @@
# -*- coding: utf-8 -*-
"""
任务执行器
管理任务的并发执行,协调处理器、租约服务等组件。
"""
import logging
import threading
from concurrent.futures import ThreadPoolExecutor, Future
from typing import Dict, Optional, TYPE_CHECKING
from domain.task import Task, TaskType
from domain.result import TaskResult, ErrorCode
from domain.config import WorkerConfig
from core.handler import TaskHandler
from services.lease_service import LeaseService
if TYPE_CHECKING:
from services.api_client import APIClientV2
logger = logging.getLogger(__name__)
class TaskExecutor:
"""
任务执行器
负责任务的并发调度和执行,包括:
- 注册和管理任务处理器
- 维护任务执行状态
- 协调租约续期
- 上报执行结果
"""
def __init__(self, config: WorkerConfig, api_client: 'APIClientV2'):
"""
初始化任务执行器
Args:
config: Worker 配置
api_client: API 客户端
"""
self.config = config
self.api_client = api_client
# 任务处理器注册表
self.handlers: Dict[TaskType, TaskHandler] = {}
# 当前任务跟踪
self.current_tasks: Dict[str, Task] = {}
self.current_futures: Dict[str, Future] = {}
# 线程池
self.executor = ThreadPoolExecutor(
max_workers=config.max_concurrency,
thread_name_prefix="TaskWorker"
)
# 线程安全锁
self.lock = threading.Lock()
# 注册处理器
self._register_handlers()
def _register_handlers(self):
"""注册所有任务处理器"""
# 延迟导入以避免循环依赖
from handlers.render_video import RenderSegmentVideoHandler
from handlers.prepare_audio import PrepareJobAudioHandler
from handlers.package_ts import PackageSegmentTsHandler
from handlers.finalize_mp4 import FinalizeMp4Handler
handlers = [
RenderSegmentVideoHandler(self.config, self.api_client),
PrepareJobAudioHandler(self.config, self.api_client),
PackageSegmentTsHandler(self.config, self.api_client),
FinalizeMp4Handler(self.config, self.api_client),
]
for handler in handlers:
task_type = handler.get_supported_type()
self.handlers[task_type] = handler
logger.debug(f"Registered handler for {task_type.value}")
def get_current_task_ids(self) -> list:
"""
获取当前正在执行的任务 ID 列表
Returns:
任务 ID 列表
"""
with self.lock:
return list(self.current_tasks.keys())
def get_current_task_count(self) -> int:
"""
获取当前正在执行的任务数量
Returns:
任务数量
"""
with self.lock:
return len(self.current_tasks)
def can_accept_task(self) -> bool:
"""
检查是否可以接受新任务
Returns:
是否可以接受
"""
return self.get_current_task_count() < self.config.max_concurrency
def submit_task(self, task: Task) -> bool:
"""
提交任务到线程池
Args:
task: 任务实体
Returns:
是否提交成功
"""
with self.lock:
# 检查任务是否已在执行
if task.task_id in self.current_tasks:
logger.warning(f"[task:{task.task_id}] Task already running, skipping")
return False
# 检查是否有对应的处理器
if task.task_type not in self.handlers:
logger.error(f"[task:{task.task_id}] No handler for type: {task.task_type.value}")
return False
# 记录任务
self.current_tasks[task.task_id] = task
# 提交到线程池
future = self.executor.submit(self._process_task, task)
self.current_futures[task.task_id] = future
logger.info(f"[task:{task.task_id}] Submitted ({task.task_type.value})")
return True
def _process_task(self, task: Task):
"""
处理单个任务(在线程池中执行)
Args:
task: 任务实体
"""
task_id = task.task_id
logger.info(f"[task:{task_id}] Starting {task.task_type.value}")
# 启动租约续期服务
lease_service = LeaseService(
self.api_client,
task_id,
interval=self.config.lease_extension_threshold,
extension=self.config.lease_extension_duration
)
lease_service.start()
try:
# 报告任务开始
self.api_client.report_start(task_id)
# 获取处理器
handler = self.handlers.get(task.task_type)
if not handler:
raise ValueError(f"No handler for task type: {task.task_type}")
# 执行前钩子
handler.before_handle(task)
# 执行任务
result = handler.handle(task)
# 执行后钩子
handler.after_handle(task, result)
# 上报结果
if result.success:
self.api_client.report_success(task_id, result.data)
logger.info(f"[task:{task_id}] Completed successfully")
else:
error_code = result.error_code.value if result.error_code else 'E_UNKNOWN'
self.api_client.report_fail(task_id, error_code, result.error_message or '')
logger.error(f"[task:{task_id}] Failed: {result.error_message}")
except Exception as e:
logger.error(f"[task:{task_id}] Exception: {e}", exc_info=True)
self.api_client.report_fail(task_id, 'E_UNKNOWN', str(e))
finally:
# 停止租约续期
lease_service.stop()
# 从当前任务中移除
with self.lock:
self.current_tasks.pop(task_id, None)
self.current_futures.pop(task_id, None)
def shutdown(self, wait: bool = True):
"""
关闭执行器
Args:
wait: 是否等待所有任务完成
"""
logger.info("Shutting down task executor...")
# 关闭线程池
self.executor.shutdown(wait=wait)
# 清理状态
with self.lock:
self.current_tasks.clear()
self.current_futures.clear()
logger.info("Task executor shutdown complete")
def get_handler(self, task_type: TaskType) -> Optional[TaskHandler]:
"""
获取指定类型的处理器
Args:
task_type: 任务类型
Returns:
处理器实例,不存在则返回 None
"""
return self.handlers.get(task_type)