feat(tracing): 集成 OpenTelemetry 链路追踪功能

- 在 base.py 中添加文件下载、上传和 FFmpeg 执行的链路追踪
- 在 api_client.py 中实现 API 请求的链路追踪和错误标记
- 在 lease_service.py 中添加租约续期的链路追踪支持
- 在 task_executor.py 中集成任务执行的完整链路追踪
- 新增 util/tracing.py 工具模块提供统一的追踪上下文管理
- 在 .env.example 中添加 OTEL 配置选项
- 在 index.py 中初始化和关闭链路追踪功能
This commit is contained in:
2026-02-07 00:11:01 +08:00
parent c9a6133be9
commit 9b373dea34
8 changed files with 549 additions and 149 deletions

View File

@@ -8,10 +8,13 @@
import logging
import threading
import time
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any, Optional
if TYPE_CHECKING:
from services.api_client import APIClientV2
from util.tracing import TaskTraceContext
from util.tracing import bind_trace_context, start_span
logger = logging.getLogger(__name__)
@@ -29,7 +32,9 @@ class LeaseService:
api_client: 'APIClientV2',
task_id: str,
interval: int = 60,
extension: int = 300
extension: int = 300,
parent_otel_context: Any = None,
task_trace_context: Optional['TaskTraceContext'] = None,
):
"""
初始化租约服务
@@ -44,6 +49,8 @@ class LeaseService:
self.task_id = task_id
self.interval = interval
self.extension = extension
self.parent_otel_context = parent_otel_context
self.task_trace_context = task_trace_context
self.running = False
self.thread: threading.Thread = None
self._stop_event = threading.Event()
@@ -79,25 +86,29 @@ class LeaseService:
def _run(self):
"""续期线程主循环"""
while self.running:
# 等待指定间隔或收到停止信号
if self._stop_event.wait(timeout=self.interval):
# 收到停止信号
break
with bind_trace_context(self.parent_otel_context, self.task_trace_context):
while self.running:
if self._stop_event.wait(timeout=self.interval):
break
if self.running:
self._extend_lease()
if self.running:
self._extend_lease()
def _extend_lease(self):
"""执行租约续期"""
try:
success = self.api_client.extend_lease(self.task_id, self.extension)
if success:
logger.debug(f"[task:{self.task_id}] Lease extended by {self.extension}s")
else:
logger.warning(f"[task:{self.task_id}] Failed to extend lease")
except Exception as e:
logger.warning(f"[task:{self.task_id}] Lease extension error: {e}")
with start_span(
"render.task.lease.extend",
task_id=self.task_id,
attributes={"render.lease.extension_seconds": self.extension},
):
try:
success = self.api_client.extend_lease(self.task_id, self.extension)
if success:
logger.debug(f"[task:{self.task_id}] Lease extended by {self.extension}s")
else:
logger.warning(f"[task:{self.task_id}] Failed to extend lease")
except Exception as e:
logger.warning(f"[task:{self.task_id}] Lease extension error: {e}")
def __enter__(self):
"""上下文管理器入口"""