feat(storage): 增强文件上传功能并添加详细的指标追踪

- 在存储服务中新增 upload_file_with_metrics 方法,返回上传结果和详细指标
- 为上传操作添加完整的指标收集,包括 HTTP 尝试次数、重试次数、状态码等
- 集成 OpenTelemetry 追踪,记录文件上传的关键属性和错误标记
- 改进缓存写回逻辑,添加缓存写入失败的日志记录
- 支持 Rclone 上传方式的指标追踪和回退到 HTTP 的情况记录
- 优化本地文件大小检查,避免重复的文件系统调用
- 添加更详细的错误日志,包含上传方法、状态码和错误类型信息
This commit is contained in:
2026-02-07 18:29:20 +08:00
parent 16ea45ad1c
commit ef4cf549c4
2 changed files with 113 additions and 10 deletions

View File

@@ -8,7 +8,7 @@
import os
import logging
import subprocess
from typing import Optional
from typing import Any, Dict, Optional, Tuple
from urllib.parse import unquote
import requests
@@ -65,6 +65,22 @@ def _apply_http_replace_map(url: str) -> str:
def upload_file(url: str, file_path: str, max_retries: int = 5, timeout: int = 60) -> bool:
"""兼容旧接口:仅返回上传是否成功。"""
result, _ = upload_file_with_metrics(
url=url,
file_path=file_path,
max_retries=max_retries,
timeout=timeout,
)
return result
def upload_file_with_metrics(
url: str,
file_path: str,
max_retries: int = 5,
timeout: int = 60
) -> Tuple[bool, Dict[str, Any]]:
"""
使用签名 URL 上传文件到 OSS
@@ -75,30 +91,54 @@ def upload_file(url: str, file_path: str, max_retries: int = 5, timeout: int = 6
timeout: 超时时间(秒)
Returns:
是否成功
(是否成功, 上传指标)
"""
metrics: Dict[str, Any] = {
"upload_method": "none",
"file_size_bytes": 0,
"content_type": "",
"http_attempts": 0,
"http_retry_count": 0,
"http_status_code": 0,
"http_replace_applied": False,
"rclone_attempted": False,
"rclone_succeeded": False,
"rclone_fallback_http": False,
"error_type": "",
}
if not os.path.exists(file_path):
logger.error(f"File not found: {file_path}")
return False
metrics["error_type"] = "file_not_found"
return False, metrics
file_size = os.path.getsize(file_path)
metrics["file_size_bytes"] = file_size
logger.info(f"Uploading: {file_path} ({file_size} bytes)")
# 检查是否使用 rclone 上传
if os.getenv("UPLOAD_METHOD") == "rclone":
metrics["rclone_attempted"] = True
logger.debug(f"Uploading to: {url}")
result = _upload_with_rclone(url, file_path)
metrics["rclone_succeeded"] = result
if result:
return True
metrics["upload_method"] = "rclone"
return True, metrics
# rclone 失败时回退到 HTTP
metrics["rclone_fallback_http"] = True
# 应用 HTTP_REPLACE_MAP 替换 URL
http_url = _apply_http_replace_map(url)
metrics["http_replace_applied"] = http_url != url
content_type = _get_content_type(file_path)
metrics["content_type"] = content_type
metrics["upload_method"] = "rclone_fallback_http" if metrics["rclone_fallback_http"] else "http"
logger.debug(f"Uploading to: {http_url} (Content-Type: {content_type})")
retries = 0
while retries < max_retries:
metrics["http_attempts"] = retries + 1
try:
with open(file_path, 'rb') as f:
with requests.put(
@@ -108,19 +148,30 @@ def upload_file(url: str, file_path: str, max_retries: int = 5, timeout: int = 6
timeout=timeout,
headers={"Content-Type": content_type}
) as response:
status_code = int(getattr(response, 'status_code', 0) or 0)
metrics["http_status_code"] = status_code
response.raise_for_status()
logger.info(f"Upload succeeded: {file_path}")
return True
metrics["error_type"] = ""
return True, metrics
except requests.exceptions.Timeout:
retries += 1
metrics["http_retry_count"] = retries
metrics["error_type"] = "timeout"
logger.warning(f"Upload timed out. Retrying {retries}/{max_retries}...")
except requests.exceptions.RequestException as e:
retries += 1
metrics["http_retry_count"] = retries
metrics["error_type"] = "request_exception"
response_obj = getattr(e, 'response', None)
status_code = getattr(response_obj, 'status_code', 0) if response_obj is not None else 0
if isinstance(status_code, int) and status_code > 0:
metrics["http_status_code"] = status_code
logger.warning(f"Upload failed ({e}). Retrying {retries}/{max_retries}...")
logger.error(f"Upload failed after {max_retries} retries: {file_path}")
return False
return False, metrics
def _upload_with_rclone(url: str, file_path: str) -> bool: