feat(storage): 增强文件上传功能并添加详细的指标追踪

- 在存储服务中新增 upload_file_with_metrics 方法,返回上传结果和详细指标
- 为上传操作添加完整的指标收集,包括 HTTP 尝试次数、重试次数、状态码等
- 集成 OpenTelemetry 追踪,记录文件上传的关键属性和错误标记
- 改进缓存写回逻辑,添加缓存写入失败的日志记录
- 支持 Rclone 上传方式的指标追踪和回退到 HTTP 的情况记录
- 优化本地文件大小检查,避免重复的文件系统调用
- 添加更详细的错误日志,包含上传方法、状态码和错误类型信息
This commit is contained in:
2026-02-07 18:29:20 +08:00
parent 16ea45ad1c
commit ef4cf549c4
2 changed files with 113 additions and 10 deletions

View File

@@ -734,16 +734,22 @@ class BaseHandler(TaskHandler, ABC):
Returns:
访问 URL,失败返回 None
"""
local_file_exists = os.path.exists(file_path)
local_file_size = os.path.getsize(file_path) if local_file_exists else 0
with start_span(
"render.task.file.upload",
kind=SpanKind.CLIENT,
attributes={
"render.file.type": file_type,
"render.file.path": file_path,
"render.file.timeout_seconds": self.config.upload_timeout,
"render.file.local_exists": local_file_exists,
"render.file.local_size_bytes": local_file_size,
},
) as span:
upload_info = self.api_client.get_upload_url(task_id, file_type, file_name)
if not upload_info:
mark_span_error(span, "get upload url failed", ErrorCode.E_UPLOAD_FAILED.value)
logger.error(f"[task:{task_id}] Failed to get upload URL")
return None
@@ -751,6 +757,7 @@ class BaseHandler(TaskHandler, ABC):
access_url = upload_info.get('accessUrl')
if not upload_url:
mark_span_error(span, "invalid upload url response", ErrorCode.E_UPLOAD_FAILED.value)
logger.error(f"[task:{task_id}] Invalid upload URL response")
return None
@@ -763,9 +770,40 @@ class BaseHandler(TaskHandler, ABC):
span.set_attribute("render.file.access_url", access_url)
try:
result = storage.upload_file(upload_url, file_path, timeout=self.config.upload_timeout)
result, upload_metrics = storage.upload_file_with_metrics(
upload_url,
file_path,
timeout=self.config.upload_timeout,
)
upload_method = str(upload_metrics.get("upload_method", "unknown"))
http_attempts = int(upload_metrics.get("http_attempts", 0))
http_retry_count = int(upload_metrics.get("http_retry_count", 0))
http_status_code = int(upload_metrics.get("http_status_code", 0))
http_replace_applied = bool(upload_metrics.get("http_replace_applied", False))
content_type = str(upload_metrics.get("content_type", ""))
error_type = str(upload_metrics.get("error_type", ""))
rclone_attempted = bool(upload_metrics.get("rclone_attempted", False))
rclone_succeeded = bool(upload_metrics.get("rclone_succeeded", False))
rclone_fallback_http = bool(upload_metrics.get("rclone_fallback_http", False))
if span is not None:
span.set_attribute("render.file.upload_success", bool(result))
span.set_attribute("render.file.upload_method", upload_method)
span.set_attribute("render.file.http_attempts", http_attempts)
span.set_attribute("render.file.http_retry_count", http_retry_count)
span.set_attribute("render.file.http_replace_applied", http_replace_applied)
span.set_attribute("render.file.rclone_attempted", rclone_attempted)
span.set_attribute("render.file.rclone_succeeded", rclone_succeeded)
span.set_attribute("render.file.rclone_fallback_http", rclone_fallback_http)
if content_type:
span.set_attribute("render.file.content_type", content_type)
if http_status_code > 0:
span.set_attribute("render.file.http_status_code", http_status_code)
if error_type:
span.set_attribute("render.file.error_type", error_type)
if result:
file_size = os.path.getsize(file_path)
file_size = local_file_size if local_file_size > 0 else os.path.getsize(file_path)
logger.info(
f"[task:{task_id}] Uploaded: {file_path} ({file_size} bytes)"
)
@@ -773,12 +811,26 @@ class BaseHandler(TaskHandler, ABC):
if span is not None:
span.set_attribute("render.file.size_bytes", file_size)
cache_write_back = "skipped"
if access_url:
self.material_cache.add_to_cache(access_url, file_path)
cache_added = self.material_cache.add_to_cache(access_url, file_path)
cache_write_back = "success" if cache_added else "failed"
if not cache_added:
logger.warning(f"[task:{task_id}] Upload cache write back failed: {file_path}")
if span is not None:
span.set_attribute("render.file.cache_write_back", cache_write_back)
return access_url
logger.error(f"[task:{task_id}] Upload failed: {file_path}")
mark_span_error(
span,
f"upload failed(method={upload_method}, status={http_status_code}, retries={http_retry_count}, error={error_type})",
ErrorCode.E_UPLOAD_FAILED.value
)
logger.error(
f"[task:{task_id}] Upload failed: {file_path}, method={upload_method}, "
f"http_status={http_status_code}, retries={http_retry_count}, error_type={error_type}"
)
return None
except Exception as e:
mark_span_error(span, str(e), ErrorCode.E_UPLOAD_FAILED.value)