You've already forked FrameTour-RenderWorker
feat(tracing): 集成 OpenTelemetry 链路追踪功能
- 在 base.py 中添加文件下载、上传和 FFmpeg 执行的链路追踪 - 在 api_client.py 中实现 API 请求的链路追踪和错误标记 - 在 lease_service.py 中添加租约续期的链路追踪支持 - 在 task_executor.py 中集成任务执行的完整链路追踪 - 新增 util/tracing.py 工具模块提供统一的追踪上下文管理 - 在 .env.example 中添加 OTEL 配置选项 - 在 index.py 中初始化和关闭链路追踪功能
This commit is contained in:
@@ -10,10 +10,14 @@ import subprocess
|
||||
import time
|
||||
import requests
|
||||
from typing import Dict, List, Optional, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from opentelemetry.trace import SpanKind, Status, StatusCode
|
||||
|
||||
from domain.task import Task
|
||||
from domain.config import WorkerConfig
|
||||
from util.system import get_hw_accel_info_str
|
||||
from util.tracing import inject_trace_headers, mark_span_error, start_span
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -55,6 +59,45 @@ class APIClientV2:
|
||||
'Accept': 'application/json'
|
||||
})
|
||||
|
||||
def _request_with_trace(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
*,
|
||||
task_id: Optional[str] = None,
|
||||
span_name: str = "",
|
||||
**kwargs: Any,
|
||||
) -> requests.Response:
|
||||
request_kwargs = dict(kwargs)
|
||||
headers = request_kwargs.pop("headers", None)
|
||||
if task_id:
|
||||
request_kwargs["headers"] = inject_trace_headers(headers)
|
||||
elif headers:
|
||||
request_kwargs["headers"] = headers
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
attributes = {
|
||||
"http.request.method": method.upper(),
|
||||
"url.path": parsed_url.path,
|
||||
"server.address": parsed_url.hostname or "",
|
||||
}
|
||||
if parsed_url.port:
|
||||
attributes["server.port"] = parsed_url.port
|
||||
|
||||
name = span_name or f"render.api.{method.lower()}"
|
||||
with start_span(name, task_id=task_id, kind=SpanKind.CLIENT, attributes=attributes) as span:
|
||||
try:
|
||||
response = self.session.request(method=method, url=url, **request_kwargs)
|
||||
except Exception as exc:
|
||||
mark_span_error(span, str(exc), "HTTP_REQUEST_ERROR")
|
||||
raise
|
||||
|
||||
if span is not None:
|
||||
span.set_attribute("http.response.status_code", response.status_code)
|
||||
if response.status_code >= 400:
|
||||
span.set_status(Status(StatusCode.ERROR, f"HTTP {response.status_code}"))
|
||||
return response
|
||||
|
||||
def sync(self, current_task_ids: List[str]) -> List[Task]:
|
||||
"""
|
||||
心跳同步并拉取任务
|
||||
@@ -128,10 +171,13 @@ class APIClientV2:
|
||||
url = f"{self.base_url}/render/v2/task/{task_id}/start"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url,
|
||||
resp = self._request_with_trace(
|
||||
method="POST",
|
||||
url=url,
|
||||
task_id=task_id,
|
||||
span_name="render.task.api.report_start",
|
||||
json={'workerId': self.worker_id},
|
||||
timeout=10
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
logger.debug(f"[task:{task_id}] Start reported")
|
||||
@@ -157,13 +203,16 @@ class APIClientV2:
|
||||
url = f"{self.base_url}/render/v2/task/{task_id}/success"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url,
|
||||
resp = self._request_with_trace(
|
||||
method="POST",
|
||||
url=url,
|
||||
task_id=task_id,
|
||||
span_name="render.task.api.report_success",
|
||||
json={
|
||||
'workerId': self.worker_id,
|
||||
'result': result
|
||||
},
|
||||
timeout=10
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
logger.debug(f"[task:{task_id}] Success reported")
|
||||
@@ -190,14 +239,17 @@ class APIClientV2:
|
||||
url = f"{self.base_url}/render/v2/task/{task_id}/fail"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url,
|
||||
resp = self._request_with_trace(
|
||||
method="POST",
|
||||
url=url,
|
||||
task_id=task_id,
|
||||
span_name="render.task.api.report_fail",
|
||||
json={
|
||||
'workerId': self.worker_id,
|
||||
'errorCode': error_code,
|
||||
'errorMessage': error_message[:1000] # 限制长度
|
||||
},
|
||||
timeout=10
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
logger.debug(f"[task:{task_id}] Failure reported")
|
||||
@@ -228,7 +280,14 @@ class APIClientV2:
|
||||
payload['fileName'] = file_name
|
||||
|
||||
try:
|
||||
resp = self.session.post(url, json=payload, timeout=10)
|
||||
resp = self._request_with_trace(
|
||||
method="POST",
|
||||
url=url,
|
||||
task_id=task_id,
|
||||
span_name="render.task.api.get_upload_url",
|
||||
json=payload,
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data.get('code') == 200:
|
||||
@@ -256,13 +315,16 @@ class APIClientV2:
|
||||
url = f"{self.base_url}/render/v2/task/{task_id}/extend-lease"
|
||||
|
||||
try:
|
||||
resp = self.session.post(
|
||||
url,
|
||||
resp = self._request_with_trace(
|
||||
method="POST",
|
||||
url=url,
|
||||
task_id=task_id,
|
||||
span_name="render.task.api.extend_lease",
|
||||
params={
|
||||
'workerId': self.worker_id,
|
||||
'extension': extension
|
||||
},
|
||||
timeout=10
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
logger.debug(f"[task:{task_id}] Lease extended by {extension}s")
|
||||
@@ -287,7 +349,13 @@ class APIClientV2:
|
||||
url = f"{self.base_url}/render/v2/task/{task_id}"
|
||||
|
||||
try:
|
||||
resp = self.session.get(url, timeout=10)
|
||||
resp = self._request_with_trace(
|
||||
method="GET",
|
||||
url=url,
|
||||
task_id=task_id,
|
||||
span_name="render.task.api.get_task_info",
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data.get('code') == 200:
|
||||
|
||||
Reference in New Issue
Block a user