You've already forked FrameTour-RenderWorker
perf(cache): 优化缓存下载逻辑并添加性能指标追踪
- 实现了带等待时间统计的缓存锁获取功能 - 新增 get_or_download_with_metrics 方法返回详细的性能指标 - 在 tracing span 中记录锁等待时间、锁获取状态和缓存路径使用情况 - 优化缓存命中路径避免不必要的锁获取操作 - 添加了缓存文件就绪检查和复制功能的独立方法 - 增加了针对缓存锁超时但仍可使用就绪缓存的处理逻辑 - 新增了多个单元测试验证缓存锁定和指标报告功能
This commit is contained in:
@@ -682,10 +682,26 @@ class BaseHandler(TaskHandler, ABC):
|
|||||||
},
|
},
|
||||||
) as span:
|
) as span:
|
||||||
try:
|
try:
|
||||||
|
lock_wait_ms = 0
|
||||||
|
lock_acquired = False
|
||||||
|
cache_path_used = "unknown"
|
||||||
if use_cache:
|
if use_cache:
|
||||||
result = self.material_cache.get_or_download(url, dest, timeout=timeout)
|
result, cache_metrics = self.material_cache.get_or_download_with_metrics(
|
||||||
|
url,
|
||||||
|
dest,
|
||||||
|
timeout=timeout
|
||||||
|
)
|
||||||
|
lock_wait_ms = int(cache_metrics.get("lock_wait_ms", 0))
|
||||||
|
lock_acquired = bool(cache_metrics.get("lock_acquired", False))
|
||||||
|
cache_path_used = str(cache_metrics.get("cache_path_used", "unknown"))
|
||||||
else:
|
else:
|
||||||
result = storage.download_file(url, dest, timeout=timeout)
|
result = storage.download_file(url, dest, timeout=timeout)
|
||||||
|
cache_path_used = "direct"
|
||||||
|
|
||||||
|
if span is not None:
|
||||||
|
span.set_attribute("render.file.lock_wait_ms", lock_wait_ms)
|
||||||
|
span.set_attribute("render.file.lock_acquired", lock_acquired)
|
||||||
|
span.set_attribute("render.file.cache_path_used", cache_path_used)
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
file_size = os.path.getsize(dest) if os.path.exists(dest) else 0
|
file_size = os.path.getsize(dest) if os.path.exists(dest) else 0
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ import logging
|
|||||||
import shutil
|
import shutil
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Optional, Tuple
|
from typing import Any, Dict, Optional, Tuple
|
||||||
from urllib.parse import urlparse, unquote
|
from urllib.parse import urlparse, unquote
|
||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
@@ -66,6 +66,7 @@ class MaterialCache:
|
|||||||
LOCK_TIMEOUT_SEC = 30.0
|
LOCK_TIMEOUT_SEC = 30.0
|
||||||
LOCK_POLL_INTERVAL_SEC = 0.1
|
LOCK_POLL_INTERVAL_SEC = 0.1
|
||||||
LOCK_STALE_SECONDS = 24 * 60 * 60
|
LOCK_STALE_SECONDS = 24 * 60 * 60
|
||||||
|
DOWNLOAD_LOCK_TIMEOUT_SEC = 5.0
|
||||||
|
|
||||||
def __init__(self, cache_dir: str, enabled: bool = True, max_size_gb: float = 0):
|
def __init__(self, cache_dir: str, enabled: bool = True, max_size_gb: float = 0):
|
||||||
"""
|
"""
|
||||||
@@ -194,13 +195,14 @@ class MaterialCache:
|
|||||||
logger.warning(f"Cache lock remove error: {e}")
|
logger.warning(f"Cache lock remove error: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _acquire_lock(self, cache_key: str) -> Optional[str]:
|
def _acquire_lock(self, cache_key: str, timeout_sec: Optional[float] = None) -> Optional[str]:
|
||||||
"""获取缓存锁(跨进程安全)"""
|
"""获取缓存锁(跨进程安全)"""
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
wait_timeout_sec = self.LOCK_TIMEOUT_SEC if timeout_sec is None else max(float(timeout_sec), 0.0)
|
||||||
lock_path = self._get_lock_path(cache_key)
|
lock_path = self._get_lock_path(cache_key)
|
||||||
deadline = time.monotonic() + self.LOCK_TIMEOUT_SEC
|
deadline = time.monotonic() + wait_timeout_sec
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
@@ -214,13 +216,24 @@ class MaterialCache:
|
|||||||
if removed:
|
if removed:
|
||||||
continue
|
continue
|
||||||
if time.monotonic() >= deadline:
|
if time.monotonic() >= deadline:
|
||||||
logger.warning(f"Cache lock timeout: {lock_path}")
|
logger.warning(f"Cache lock timeout ({wait_timeout_sec:.1f}s): {lock_path}")
|
||||||
return None
|
return None
|
||||||
time.sleep(self.LOCK_POLL_INTERVAL_SEC)
|
time.sleep(self.LOCK_POLL_INTERVAL_SEC)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Cache lock error: {e}")
|
logger.warning(f"Cache lock error: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _acquire_lock_with_wait(
|
||||||
|
self,
|
||||||
|
cache_key: str,
|
||||||
|
timeout_sec: Optional[float] = None
|
||||||
|
) -> Tuple[Optional[str], int]:
|
||||||
|
"""获取缓存锁并返回等待时长(毫秒)"""
|
||||||
|
start_time = time.monotonic()
|
||||||
|
lock_path = self._acquire_lock(cache_key, timeout_sec=timeout_sec)
|
||||||
|
lock_wait_ms = max(int((time.monotonic() - start_time) * 1000), 0)
|
||||||
|
return lock_path, lock_wait_ms
|
||||||
|
|
||||||
def _release_lock(self, lock_path: Optional[str]) -> None:
|
def _release_lock(self, lock_path: Optional[str]) -> None:
|
||||||
"""释放缓存锁"""
|
"""释放缓存锁"""
|
||||||
if not lock_path:
|
if not lock_path:
|
||||||
@@ -244,6 +257,27 @@ class MaterialCache:
|
|||||||
exists = os.path.exists(cache_path) and os.path.getsize(cache_path) > 0
|
exists = os.path.exists(cache_path) and os.path.getsize(cache_path) > 0
|
||||||
return exists, cache_path
|
return exists, cache_path
|
||||||
|
|
||||||
|
def _is_cache_file_ready(self, cache_path: str) -> bool:
|
||||||
|
"""缓存文件是否已就绪(存在且大小大于 0)"""
|
||||||
|
try:
|
||||||
|
return os.path.exists(cache_path) and os.path.getsize(cache_path) > 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _copy_cache_to_dest(self, cache_path: str, dest: str) -> Tuple[bool, int]:
|
||||||
|
"""将缓存文件复制到目标路径并返回结果与文件大小"""
|
||||||
|
try:
|
||||||
|
shutil.copy2(cache_path, dest)
|
||||||
|
try:
|
||||||
|
os.utime(cache_path, None)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Failed to update cache access time: {e}")
|
||||||
|
file_size = os.path.getsize(dest) if os.path.exists(dest) else 0
|
||||||
|
return True, file_size
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to copy from cache: {e}")
|
||||||
|
return False, 0
|
||||||
|
|
||||||
def get_or_download(
|
def get_or_download(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
@@ -251,8 +285,24 @@ class MaterialCache:
|
|||||||
timeout: int = 300,
|
timeout: int = 300,
|
||||||
max_retries: int = 5
|
max_retries: int = 5
|
||||||
) -> bool:
|
) -> bool:
|
||||||
|
"""兼容旧接口:返回下载是否成功。"""
|
||||||
|
result, _ = self.get_or_download_with_metrics(
|
||||||
|
url=url,
|
||||||
|
dest=dest,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=max_retries,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_or_download_with_metrics(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
dest: str,
|
||||||
|
timeout: int = 300,
|
||||||
|
max_retries: int = 5
|
||||||
|
) -> Tuple[bool, Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
从缓存获取素材,若未缓存则下载并缓存
|
从缓存获取素材,若未缓存则下载并缓存,并返回关键指标。
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url: 素材 URL
|
url: 素材 URL
|
||||||
@@ -261,8 +311,14 @@ class MaterialCache:
|
|||||||
max_retries: 最大重试次数
|
max_retries: 最大重试次数
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
是否成功
|
(是否成功, 指标字典)
|
||||||
"""
|
"""
|
||||||
|
metrics: Dict[str, Any] = {
|
||||||
|
"lock_wait_ms": 0,
|
||||||
|
"lock_acquired": False,
|
||||||
|
"cache_path_used": "unknown",
|
||||||
|
}
|
||||||
|
|
||||||
# 确保目标目录存在
|
# 确保目标目录存在
|
||||||
dest_dir = os.path.dirname(dest)
|
dest_dir = os.path.dirname(dest)
|
||||||
if dest_dir:
|
if dest_dir:
|
||||||
@@ -270,34 +326,49 @@ class MaterialCache:
|
|||||||
|
|
||||||
# 缓存未启用时直接下载
|
# 缓存未启用时直接下载
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
return storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)
|
result = storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)
|
||||||
|
metrics["cache_path_used"] = "direct"
|
||||||
|
return result, metrics
|
||||||
|
|
||||||
cache_key = _extract_cache_key(url)
|
cache_key = _extract_cache_key(url)
|
||||||
lock_path = self._acquire_lock(cache_key)
|
|
||||||
if not lock_path:
|
|
||||||
logger.warning(f"Cache lock unavailable, downloading without cache: {url[:80]}...")
|
|
||||||
return storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)
|
|
||||||
|
|
||||||
try:
|
|
||||||
cache_path = self.get_cache_path(url)
|
cache_path = self.get_cache_path(url)
|
||||||
cached = os.path.exists(cache_path) and os.path.getsize(cache_path) > 0
|
|
||||||
|
|
||||||
if cached:
|
def _try_serve_from_cache(log_prefix: str, delete_on_failure: bool = False) -> bool:
|
||||||
# 命中缓存,复制到目标路径
|
if not self._is_cache_file_ready(cache_path):
|
||||||
try:
|
return False
|
||||||
shutil.copy2(cache_path, dest)
|
copied, file_size = self._copy_cache_to_dest(cache_path, dest)
|
||||||
# 更新访问时间(用于 LRU 清理)
|
if copied:
|
||||||
os.utime(cache_path, None)
|
metrics["cache_path_used"] = "cache"
|
||||||
file_size = os.path.getsize(dest)
|
logger.info(f"{log_prefix}: {url[:80]}... -> {dest} ({file_size} bytes)")
|
||||||
logger.info(f"Cache hit: {url[:80]}... -> {dest} ({file_size} bytes)")
|
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
if delete_on_failure:
|
||||||
logger.warning(f"Failed to copy from cache: {e}, will re-download")
|
|
||||||
# 缓存复制失败,删除可能损坏的缓存文件
|
|
||||||
try:
|
try:
|
||||||
os.remove(cache_path)
|
os.remove(cache_path)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
if _try_serve_from_cache("Cache hit"):
|
||||||
|
return True, metrics
|
||||||
|
|
||||||
|
lock_path, lock_wait_ms = self._acquire_lock_with_wait(
|
||||||
|
cache_key,
|
||||||
|
timeout_sec=self.DOWNLOAD_LOCK_TIMEOUT_SEC,
|
||||||
|
)
|
||||||
|
metrics["lock_wait_ms"] = lock_wait_ms
|
||||||
|
if not lock_path:
|
||||||
|
if _try_serve_from_cache("Cache hit after lock timeout"):
|
||||||
|
return True, metrics
|
||||||
|
logger.warning(f"Cache lock unavailable, downloading without cache: {url[:80]}...")
|
||||||
|
result = storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)
|
||||||
|
metrics["cache_path_used"] = "direct"
|
||||||
|
return result, metrics
|
||||||
|
|
||||||
|
metrics["lock_acquired"] = True
|
||||||
|
|
||||||
|
try:
|
||||||
|
if _try_serve_from_cache("Cache hit", delete_on_failure=True):
|
||||||
|
return True, metrics
|
||||||
|
|
||||||
# 未命中缓存,下载到缓存目录
|
# 未命中缓存,下载到缓存目录
|
||||||
logger.debug(f"Cache miss: {url[:80]}...")
|
logger.debug(f"Cache miss: {url[:80]}...")
|
||||||
@@ -312,26 +383,25 @@ class MaterialCache:
|
|||||||
# 下载失败,清理临时文件
|
# 下载失败,清理临时文件
|
||||||
if os.path.exists(temp_cache_path):
|
if os.path.exists(temp_cache_path):
|
||||||
os.remove(temp_cache_path)
|
os.remove(temp_cache_path)
|
||||||
return False
|
return False, metrics
|
||||||
|
|
||||||
if not os.path.exists(temp_cache_path) or os.path.getsize(temp_cache_path) <= 0:
|
if not os.path.exists(temp_cache_path) or os.path.getsize(temp_cache_path) <= 0:
|
||||||
if os.path.exists(temp_cache_path):
|
if os.path.exists(temp_cache_path):
|
||||||
os.remove(temp_cache_path)
|
os.remove(temp_cache_path)
|
||||||
return False
|
return False, metrics
|
||||||
|
|
||||||
# 下载成功,原子替换缓存文件
|
# 下载成功,原子替换缓存文件
|
||||||
os.replace(temp_cache_path, cache_path)
|
os.replace(temp_cache_path, cache_path)
|
||||||
|
|
||||||
# 复制到目标路径
|
# 复制到目标路径
|
||||||
shutil.copy2(cache_path, dest)
|
if not _try_serve_from_cache("Downloaded and cached", delete_on_failure=False):
|
||||||
file_size = os.path.getsize(dest)
|
return False, metrics
|
||||||
logger.info(f"Downloaded and cached: {url[:80]}... ({file_size} bytes)")
|
|
||||||
|
|
||||||
# 检查是否需要清理缓存
|
# 检查是否需要清理缓存
|
||||||
if self.max_size_bytes > 0:
|
if self.max_size_bytes > 0:
|
||||||
self._cleanup_if_needed()
|
self._cleanup_if_needed()
|
||||||
|
|
||||||
return True
|
return True, metrics
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Cache download error: {e}")
|
logger.error(f"Cache download error: {e}")
|
||||||
@@ -341,7 +411,7 @@ class MaterialCache:
|
|||||||
os.remove(temp_cache_path)
|
os.remove(temp_cache_path)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return False
|
return False, metrics
|
||||||
finally:
|
finally:
|
||||||
self._release_lock(lock_path)
|
self._release_lock(lock_path)
|
||||||
|
|
||||||
|
|||||||
@@ -13,3 +13,89 @@ def test_cache_lock_acquire_release(tmp_path):
|
|||||||
assert os.path.exists(lock_path)
|
assert os.path.exists(lock_path)
|
||||||
cache._release_lock(lock_path)
|
cache._release_lock(lock_path)
|
||||||
assert not os.path.exists(lock_path)
|
assert not os.path.exists(lock_path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_or_download_cache_hit_does_not_wait_lock(tmp_path, monkeypatch):
|
||||||
|
cache = MaterialCache(cache_dir=str(tmp_path), enabled=True, max_size_gb=0)
|
||||||
|
url = "https://example.com/path/video.mp4?token=abc"
|
||||||
|
cache_path = cache.get_cache_path(url)
|
||||||
|
with open(cache_path, 'wb') as file_obj:
|
||||||
|
file_obj.write(b'cached-data')
|
||||||
|
destination = tmp_path / "result.bin"
|
||||||
|
|
||||||
|
def _unexpected_acquire(*args, **kwargs):
|
||||||
|
raise AssertionError("cache hit path should not acquire lock")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cache, "_acquire_lock", _unexpected_acquire)
|
||||||
|
|
||||||
|
assert cache.get_or_download(url, str(destination), timeout=1) is True
|
||||||
|
assert destination.read_bytes() == b'cached-data'
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_or_download_lock_timeout_can_still_use_ready_cache(tmp_path, monkeypatch):
|
||||||
|
cache = MaterialCache(cache_dir=str(tmp_path), enabled=True, max_size_gb=0)
|
||||||
|
url = "https://example.com/path/audio.aac?token=abc"
|
||||||
|
cache_path = cache.get_cache_path(url)
|
||||||
|
with open(cache_path, 'wb') as file_obj:
|
||||||
|
file_obj.write(b'audio-cache')
|
||||||
|
destination = tmp_path / "audio.aac"
|
||||||
|
download_called = {"value": False}
|
||||||
|
|
||||||
|
monkeypatch.setattr(cache, "_acquire_lock", lambda *args, **kwargs: None)
|
||||||
|
|
||||||
|
def _fake_download(*args, **kwargs):
|
||||||
|
download_called["value"] = True
|
||||||
|
return False
|
||||||
|
|
||||||
|
monkeypatch.setattr("services.cache.storage.download_file", _fake_download)
|
||||||
|
|
||||||
|
assert cache.get_or_download(url, str(destination), timeout=1) is True
|
||||||
|
assert destination.read_bytes() == b'audio-cache'
|
||||||
|
assert download_called["value"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_or_download_uses_short_lock_timeout(tmp_path, monkeypatch):
|
||||||
|
cache = MaterialCache(cache_dir=str(tmp_path), enabled=True, max_size_gb=0)
|
||||||
|
url = "https://example.com/path/segment.ts?token=abc"
|
||||||
|
destination = tmp_path / "segment.ts"
|
||||||
|
captured = {"timeout_sec": None}
|
||||||
|
|
||||||
|
def _fake_acquire(cache_key, timeout_sec=None):
|
||||||
|
captured["timeout_sec"] = timeout_sec
|
||||||
|
return None
|
||||||
|
|
||||||
|
monkeypatch.setattr(cache, "_acquire_lock", _fake_acquire)
|
||||||
|
monkeypatch.setattr("services.cache.storage.download_file", lambda *args, **kwargs: True)
|
||||||
|
|
||||||
|
assert cache.get_or_download(url, str(destination), timeout=1) is True
|
||||||
|
assert captured["timeout_sec"] == cache.DOWNLOAD_LOCK_TIMEOUT_SEC
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_or_download_with_metrics_cache_hit_wait_zero(tmp_path):
|
||||||
|
cache = MaterialCache(cache_dir=str(tmp_path), enabled=True, max_size_gb=0)
|
||||||
|
url = "https://example.com/path/hit.mp4?token=abc"
|
||||||
|
cache_path = cache.get_cache_path(url)
|
||||||
|
with open(cache_path, 'wb') as file_obj:
|
||||||
|
file_obj.write(b'hit-data')
|
||||||
|
destination = tmp_path / "hit.mp4"
|
||||||
|
|
||||||
|
success, metrics = cache.get_or_download_with_metrics(url, str(destination), timeout=1)
|
||||||
|
assert success is True
|
||||||
|
assert metrics["lock_wait_ms"] == 0
|
||||||
|
assert metrics["lock_acquired"] is False
|
||||||
|
assert metrics["cache_path_used"] == "cache"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_or_download_with_metrics_reports_lock_wait_ms(tmp_path, monkeypatch):
|
||||||
|
cache = MaterialCache(cache_dir=str(tmp_path), enabled=True, max_size_gb=0)
|
||||||
|
url = "https://example.com/path/miss.mp4?token=abc"
|
||||||
|
destination = tmp_path / "miss.mp4"
|
||||||
|
|
||||||
|
monkeypatch.setattr(cache, "_acquire_lock_with_wait", lambda *args, **kwargs: (None, 4321))
|
||||||
|
monkeypatch.setattr("services.cache.storage.download_file", lambda *args, **kwargs: True)
|
||||||
|
|
||||||
|
success, metrics = cache.get_or_download_with_metrics(url, str(destination), timeout=1)
|
||||||
|
assert success is True
|
||||||
|
assert metrics["lock_wait_ms"] == 4321
|
||||||
|
assert metrics["lock_acquired"] is False
|
||||||
|
assert metrics["cache_path_used"] == "direct"
|
||||||
|
|||||||
Reference in New Issue
Block a user