You've already forked FrameTour-RenderWorker
feat(cache): 增强缓存锁机制支持进程存活检测
- 添加了锁元数据写入和读取功能,记录进程ID和启动时间 - 实现了进程存活检查机制,防止PID复用导致的死锁 - 引入了过期锁检测和自动清理机制 - 集成了psutil库进行系统进程监控 - 优化了缓存清理逻辑,支持跳过活跃锁文件 - 使用JSON格式存储锁元数据信息
This commit is contained in:
@@ -5,6 +5,7 @@
|
|||||||
提供素材下载缓存功能,避免相同素材重复下载。
|
提供素材下载缓存功能,避免相同素材重复下载。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
@@ -14,6 +15,8 @@ import uuid
|
|||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
from urllib.parse import urlparse, unquote
|
from urllib.parse import urlparse, unquote
|
||||||
|
|
||||||
|
import psutil
|
||||||
|
|
||||||
from services import storage
|
from services import storage
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -62,6 +65,7 @@ class MaterialCache:
|
|||||||
|
|
||||||
LOCK_TIMEOUT_SEC = 30.0
|
LOCK_TIMEOUT_SEC = 30.0
|
||||||
LOCK_POLL_INTERVAL_SEC = 0.1
|
LOCK_POLL_INTERVAL_SEC = 0.1
|
||||||
|
LOCK_STALE_SECONDS = 24 * 60 * 60
|
||||||
|
|
||||||
def __init__(self, cache_dir: str, enabled: bool = True, max_size_gb: float = 0):
|
def __init__(self, cache_dir: str, enabled: bool = True, max_size_gb: float = 0):
|
||||||
"""
|
"""
|
||||||
@@ -100,6 +104,96 @@ class MaterialCache:
|
|||||||
assert self.cache_dir
|
assert self.cache_dir
|
||||||
return os.path.join(self.cache_dir, f"{cache_key}.lock")
|
return os.path.join(self.cache_dir, f"{cache_key}.lock")
|
||||||
|
|
||||||
|
def _write_lock_metadata(self, lock_fd: int, lock_path: str) -> bool:
|
||||||
|
"""写入锁元数据,失败则清理锁文件"""
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
process_start_time = psutil.Process(os.getpid()).create_time()
|
||||||
|
except Exception as e:
|
||||||
|
process_start_time = None
|
||||||
|
logger.warning(f"Cache lock process start time error: {e}")
|
||||||
|
metadata = {
|
||||||
|
'pid': os.getpid(),
|
||||||
|
'process_start_time': process_start_time,
|
||||||
|
'created_at': time.time()
|
||||||
|
}
|
||||||
|
with os.fdopen(lock_fd, 'w', encoding='utf-8') as lock_file:
|
||||||
|
json.dump(metadata, lock_file)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
try:
|
||||||
|
os.close(lock_fd)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._remove_lock_file(lock_path, f"write metadata failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _read_lock_metadata(self, lock_path: str) -> Optional[dict]:
|
||||||
|
"""读取锁元数据,失败返回 None(兼容历史空锁文件)"""
|
||||||
|
try:
|
||||||
|
with open(lock_path, 'r', encoding='utf-8') as lock_file:
|
||||||
|
data = json.load(lock_file)
|
||||||
|
return data if isinstance(data, dict) else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _is_process_alive(self, pid: int, expected_start_time: Optional[float]) -> bool:
|
||||||
|
"""判断进程是否存活并校验启动时间(防止 PID 复用)"""
|
||||||
|
try:
|
||||||
|
process = psutil.Process(pid)
|
||||||
|
if expected_start_time is None:
|
||||||
|
return process.is_running()
|
||||||
|
actual_start_time = process.create_time()
|
||||||
|
return abs(actual_start_time - expected_start_time) < 1.0
|
||||||
|
except psutil.NoSuchProcess:
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Cache lock process check error: {e}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _is_lock_stale(self, lock_path: str) -> bool:
|
||||||
|
"""判断锁是否过期(进程已退出或超过最大存活时长)"""
|
||||||
|
if not os.path.exists(lock_path):
|
||||||
|
return False
|
||||||
|
now = time.time()
|
||||||
|
metadata = self._read_lock_metadata(lock_path)
|
||||||
|
if metadata:
|
||||||
|
created_at = metadata.get('created_at')
|
||||||
|
if isinstance(created_at, (int, float)) and now - created_at > self.LOCK_STALE_SECONDS:
|
||||||
|
return True
|
||||||
|
pid = metadata.get('pid')
|
||||||
|
pid_value = int(pid) if isinstance(pid, int) or (isinstance(pid, str) and pid.isdigit()) else None
|
||||||
|
expected_start_time = metadata.get('process_start_time')
|
||||||
|
expected_start_time_value = (
|
||||||
|
expected_start_time if isinstance(expected_start_time, (int, float)) else None
|
||||||
|
)
|
||||||
|
if pid_value is not None and not self._is_process_alive(pid_value, expected_start_time_value):
|
||||||
|
return True
|
||||||
|
return self._is_lock_stale_by_mtime(lock_path, now)
|
||||||
|
return self._is_lock_stale_by_mtime(lock_path, now)
|
||||||
|
|
||||||
|
def _is_lock_stale_by_mtime(self, lock_path: str, now: float) -> bool:
|
||||||
|
"""基于文件时间判断锁是否过期"""
|
||||||
|
try:
|
||||||
|
mtime = os.path.getmtime(lock_path)
|
||||||
|
return now - mtime > self.LOCK_STALE_SECONDS
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Cache lock stat error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _remove_lock_file(self, lock_path: str, reason: str = "") -> bool:
|
||||||
|
"""删除锁文件"""
|
||||||
|
try:
|
||||||
|
os.remove(lock_path)
|
||||||
|
if reason:
|
||||||
|
logger.info(f"Cache lock removed: {lock_path} ({reason})")
|
||||||
|
return True
|
||||||
|
except FileNotFoundError:
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Cache lock remove error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
def _acquire_lock(self, cache_key: str) -> Optional[str]:
|
def _acquire_lock(self, cache_key: str) -> Optional[str]:
|
||||||
"""获取缓存锁(跨进程安全)"""
|
"""获取缓存锁(跨进程安全)"""
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
@@ -111,9 +205,14 @@ class MaterialCache:
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
||||||
os.close(fd)
|
if not self._write_lock_metadata(fd, lock_path):
|
||||||
|
return None
|
||||||
return lock_path
|
return lock_path
|
||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
|
if self._is_lock_stale(lock_path):
|
||||||
|
removed = self._remove_lock_file(lock_path, "stale lock")
|
||||||
|
if removed:
|
||||||
|
continue
|
||||||
if time.monotonic() >= deadline:
|
if time.monotonic() >= deadline:
|
||||||
logger.warning(f"Cache lock timeout: {lock_path}")
|
logger.warning(f"Cache lock timeout: {lock_path}")
|
||||||
return None
|
return None
|
||||||
@@ -126,12 +225,7 @@ class MaterialCache:
|
|||||||
"""释放缓存锁"""
|
"""释放缓存锁"""
|
||||||
if not lock_path:
|
if not lock_path:
|
||||||
return
|
return
|
||||||
try:
|
self._remove_lock_file(lock_path)
|
||||||
os.remove(lock_path)
|
|
||||||
except FileNotFoundError:
|
|
||||||
return
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Cache lock release error: {e}")
|
|
||||||
|
|
||||||
def is_cached(self, url: str) -> Tuple[bool, str]:
|
def is_cached(self, url: str) -> Tuple[bool, str]:
|
||||||
"""
|
"""
|
||||||
@@ -357,9 +451,12 @@ class MaterialCache:
|
|||||||
cache_key = os.path.splitext(filename)[0]
|
cache_key = os.path.splitext(filename)[0]
|
||||||
lock_path = self._get_lock_path(cache_key)
|
lock_path = self._get_lock_path(cache_key)
|
||||||
if os.path.exists(lock_path):
|
if os.path.exists(lock_path):
|
||||||
# 该文件正在被其他任务使用,跳过删除
|
if self._is_lock_stale(lock_path):
|
||||||
logger.debug(f"Cache cleanup: skipping locked file {filename}")
|
self._remove_lock_file(lock_path, "cleanup stale lock")
|
||||||
continue
|
else:
|
||||||
|
# 该文件正在被其他任务使用,跳过删除
|
||||||
|
logger.debug(f"Cache cleanup: skipping locked file {filename}")
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
os.remove(file_info['path'])
|
os.remove(file_info['path'])
|
||||||
total_size -= file_info['size']
|
total_size -= file_info['size']
|
||||||
|
|||||||
Reference in New Issue
Block a user