You've already forked FrameTour-RenderWorker
feat(tracing): 集成 OpenTelemetry 链路追踪功能
- 在 base.py 中添加文件下载、上传和 FFmpeg 执行的链路追踪 - 在 api_client.py 中实现 API 请求的链路追踪和错误标记 - 在 lease_service.py 中添加租约续期的链路追踪支持 - 在 task_executor.py 中集成任务执行的完整链路追踪 - 新增 util/tracing.py 工具模块提供统一的追踪上下文管理 - 在 .env.example 中添加 OTEL 配置选项 - 在 index.py 中初始化和关闭链路追踪功能
This commit is contained in:
151
handlers/base.py
151
handlers/base.py
@@ -15,12 +15,15 @@ import threading
|
||||
from abc import ABC
|
||||
from typing import Optional, List, Dict, Any, Tuple, TYPE_CHECKING
|
||||
|
||||
from opentelemetry.trace import SpanKind
|
||||
|
||||
from core.handler import TaskHandler
|
||||
from domain.task import Task
|
||||
from domain.result import TaskResult, ErrorCode
|
||||
from domain.config import WorkerConfig
|
||||
from services import storage
|
||||
from services.cache import MaterialCache
|
||||
from util.tracing import mark_span_error, start_span
|
||||
from constant import (
|
||||
HW_ACCEL_NONE, HW_ACCEL_QSV, HW_ACCEL_CUDA,
|
||||
VIDEO_ENCODE_PARAMS, VIDEO_ENCODE_PARAMS_QSV, VIDEO_ENCODE_PARAMS_CUDA
|
||||
@@ -410,21 +413,30 @@ class BaseHandler(TaskHandler, ABC):
|
||||
if timeout is None:
|
||||
timeout = self.config.download_timeout
|
||||
|
||||
try:
|
||||
if use_cache:
|
||||
# 使用缓存下载
|
||||
result = self.material_cache.get_or_download(url, dest, timeout=timeout)
|
||||
else:
|
||||
# 直接下载(不走缓存)
|
||||
result = storage.download_file(url, dest, timeout=timeout)
|
||||
with start_span(
|
||||
"render.task.file.download",
|
||||
kind=SpanKind.CLIENT,
|
||||
attributes={
|
||||
"render.file.destination": dest,
|
||||
"render.file.use_cache": use_cache,
|
||||
},
|
||||
) as span:
|
||||
try:
|
||||
if use_cache:
|
||||
result = self.material_cache.get_or_download(url, dest, timeout=timeout)
|
||||
else:
|
||||
result = storage.download_file(url, dest, timeout=timeout)
|
||||
|
||||
if result:
|
||||
file_size = os.path.getsize(dest) if os.path.exists(dest) else 0
|
||||
logger.debug(f"Downloaded: {url} -> {dest} ({file_size} bytes)")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Download failed: {url} -> {e}")
|
||||
return False
|
||||
if result:
|
||||
file_size = os.path.getsize(dest) if os.path.exists(dest) else 0
|
||||
logger.debug(f"Downloaded: {url} -> {dest} ({file_size} bytes)")
|
||||
if span is not None:
|
||||
span.set_attribute("render.file.size_bytes", file_size)
|
||||
return result
|
||||
except Exception as e:
|
||||
mark_span_error(span, str(e), ErrorCode.E_INPUT_UNAVAILABLE.value)
|
||||
logger.error(f"Download failed: {url} -> {e}")
|
||||
return False
|
||||
|
||||
def upload_file(
|
||||
self,
|
||||
@@ -445,37 +457,45 @@ class BaseHandler(TaskHandler, ABC):
|
||||
Returns:
|
||||
访问 URL,失败返回 None
|
||||
"""
|
||||
# 获取上传 URL
|
||||
upload_info = self.api_client.get_upload_url(task_id, file_type, file_name)
|
||||
if not upload_info:
|
||||
logger.error(f"[task:{task_id}] Failed to get upload URL")
|
||||
return None
|
||||
with start_span(
|
||||
"render.task.file.upload",
|
||||
kind=SpanKind.CLIENT,
|
||||
attributes={
|
||||
"render.file.type": file_type,
|
||||
"render.file.path": file_path,
|
||||
},
|
||||
) as span:
|
||||
upload_info = self.api_client.get_upload_url(task_id, file_type, file_name)
|
||||
if not upload_info:
|
||||
logger.error(f"[task:{task_id}] Failed to get upload URL")
|
||||
return None
|
||||
|
||||
upload_url = upload_info.get('uploadUrl')
|
||||
access_url = upload_info.get('accessUrl')
|
||||
upload_url = upload_info.get('uploadUrl')
|
||||
access_url = upload_info.get('accessUrl')
|
||||
|
||||
if not upload_url:
|
||||
logger.error(f"[task:{task_id}] Invalid upload URL response")
|
||||
return None
|
||||
if not upload_url:
|
||||
logger.error(f"[task:{task_id}] Invalid upload URL response")
|
||||
return None
|
||||
|
||||
# 上传文件
|
||||
try:
|
||||
result = storage.upload_file(upload_url, file_path, timeout=self.config.upload_timeout)
|
||||
if result:
|
||||
file_size = os.path.getsize(file_path)
|
||||
logger.info(f"[task:{task_id}] Uploaded: {file_path} ({file_size} bytes)")
|
||||
try:
|
||||
result = storage.upload_file(upload_url, file_path, timeout=self.config.upload_timeout)
|
||||
if result:
|
||||
file_size = os.path.getsize(file_path)
|
||||
logger.info(f"[task:{task_id}] Uploaded: {file_path} ({file_size} bytes)")
|
||||
if span is not None:
|
||||
span.set_attribute("render.file.size_bytes", file_size)
|
||||
|
||||
# 将上传成功的文件加入缓存
|
||||
if access_url:
|
||||
self.material_cache.add_to_cache(access_url, file_path)
|
||||
if access_url:
|
||||
self.material_cache.add_to_cache(access_url, file_path)
|
||||
|
||||
return access_url
|
||||
|
||||
return access_url
|
||||
else:
|
||||
logger.error(f"[task:{task_id}] Upload failed: {file_path}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"[task:{task_id}] Upload error: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
mark_span_error(span, str(e), ErrorCode.E_UPLOAD_FAILED.value)
|
||||
logger.error(f"[task:{task_id}] Upload error: {e}")
|
||||
return None
|
||||
|
||||
def run_ffmpeg(
|
||||
self,
|
||||
@@ -507,29 +527,42 @@ class BaseHandler(TaskHandler, ABC):
|
||||
cmd_str = cmd_str[:500] + '...'
|
||||
logger.info(f"[task:{task_id}] FFmpeg: {cmd_str}")
|
||||
|
||||
try:
|
||||
run_args = subprocess_args(False)
|
||||
run_args['stdout'] = subprocess.DEVNULL
|
||||
run_args['stderr'] = subprocess.PIPE
|
||||
result = subprocess.run(
|
||||
cmd_to_run,
|
||||
timeout=timeout,
|
||||
**run_args
|
||||
)
|
||||
with start_span(
|
||||
"render.task.ffmpeg.run",
|
||||
attributes={
|
||||
"render.ffmpeg.timeout_seconds": timeout,
|
||||
"render.ffmpeg.command": cmd_str,
|
||||
},
|
||||
) as span:
|
||||
try:
|
||||
run_args = subprocess_args(False)
|
||||
run_args['stdout'] = subprocess.DEVNULL
|
||||
run_args['stderr'] = subprocess.PIPE
|
||||
result = subprocess.run(
|
||||
cmd_to_run,
|
||||
timeout=timeout,
|
||||
**run_args
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = (result.stderr or b'').decode('utf-8', errors='replace')[:1000]
|
||||
logger.error(f"[task:{task_id}] FFmpeg failed (code={result.returncode}): {stderr}")
|
||||
if span is not None:
|
||||
span.set_attribute("render.ffmpeg.return_code", result.returncode)
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = (result.stderr or b'').decode('utf-8', errors='replace')[:1000]
|
||||
logger.error(f"[task:{task_id}] FFmpeg failed (code={result.returncode}): {stderr}")
|
||||
mark_span_error(span, stderr or "ffmpeg failed", ErrorCode.E_FFMPEG_FAILED.value)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error(f"[task:{task_id}] FFmpeg timeout after {timeout}s")
|
||||
mark_span_error(span, f"timeout after {timeout}s", ErrorCode.E_TIMEOUT.value)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"[task:{task_id}] FFmpeg error: {e}")
|
||||
mark_span_error(span, str(e), ErrorCode.E_FFMPEG_FAILED.value)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error(f"[task:{task_id}] FFmpeg timeout after {timeout}s")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"[task:{task_id}] FFmpeg error: {e}")
|
||||
return False
|
||||
|
||||
def probe_duration(self, file_path: str) -> Optional[float]:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user