feat(material-cache): 添加缓存锁机制防止并发冲突

- 实现跨进程缓存锁获取和释放功能 - 在下载过程中使用UUID生成唯一的临时文件名避免并发覆盖 - 添加超时机制和轮询间隔控制锁等待时间 - 修改清理逻辑跳过锁文件和下载中的临时文件 - 添加测试验证缓存锁功能正常工作 fix(ffmpeg): 优化FFmpeg命令执行和错误处理 - 添加默认日志级别为error减少冗余输出 - 修复subprocess运行参数传递方式 - 改进错误信息截取避免空值解码异常 refactor(system-info): 优化系统信息获取和缓存机制 - 实现FFmpeg版本、编解码器信息缓存避免重复查询 - 添加系统信息TTL缓存机制提升性能 - 实现GPU信息检查状态缓存避免重复检测 - 整合静态系统信息和动态信息分离处理 refactor(storage): 优化HTTP上传下载资源管理 - 使用上下文管理器确保请求连接正确关闭 - 修改rclone命令构建方式从字符串改为列表形式 - 改进错误处理截取stderr输出长度限制 - 优化响应处理避免资源泄露
2026-01-19 20:03:18 +08:00
parent 0cc96a968b
commit b291f33486
6 changed files with 238 additions and 96 deletions
--- a/services/cache.py
+++ b/services/cache.py
@@ -10,6 +10,7 @@ import hashlib
 import logging
 import shutil
 import time
+import uuid
 from typing import Optional, Tuple
 from urllib.parse import urlparse, unquote

@@ -59,6 +60,9 @@ class MaterialCache:
    负责素材文件的缓存存储和检索。
    """

+    LOCK_TIMEOUT_SEC = 30.0
+    LOCK_POLL_INTERVAL_SEC = 0.1
+
    def __init__(self, cache_dir: str, enabled: bool = True, max_size_gb: float = 0):
        """
        初始化缓存管理器
@@ -91,6 +95,44 @@ class MaterialCache:
        filename = f"{cache_key}{ext}"
        return os.path.join(self.cache_dir, filename)

+    def _get_lock_path(self, cache_key: str) -> str:
+        """获取缓存锁文件路径"""
+        assert self.cache_dir
+        return os.path.join(self.cache_dir, f"{cache_key}.lock")
+
+    def _acquire_lock(self, cache_key: str) -> Optional[str]:
+        """获取缓存锁（跨进程安全）"""
+        if not self.enabled:
+            return None
+
+        lock_path = self._get_lock_path(cache_key)
+        deadline = time.monotonic() + self.LOCK_TIMEOUT_SEC
+
+        while True:
+            try:
+                fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+                os.close(fd)
+                return lock_path
+            except FileExistsError:
+                if time.monotonic() >= deadline:
+                    logger.warning(f"Cache lock timeout: {lock_path}")
+                    return None
+                time.sleep(self.LOCK_POLL_INTERVAL_SEC)
+            except Exception as e:
+                logger.warning(f"Cache lock error: {e}")
+                return None
+
+    def _release_lock(self, lock_path: Optional[str]) -> None:
+        """释放缓存锁"""
+        if not lock_path:
+            return
+        try:
+            os.remove(lock_path)
+        except FileNotFoundError:
+            return
+        except Exception as e:
+            logger.warning(f"Cache lock release error: {e}")
+
    def is_cached(self, url: str) -> Tuple[bool, str]:
        """
        检查素材是否已缓存
@@ -136,63 +178,78 @@ class MaterialCache:
        if not self.enabled:
            return storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)

-        # 检查缓存
-        cached, cache_path = self.is_cached(url)
+        cache_key = _extract_cache_key(url)
+        lock_path = self._acquire_lock(cache_key)
+        if not lock_path:
+            logger.warning(f"Cache lock unavailable, downloading without cache: {url[:80]}...")
+            return storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)

-        if cached:
-            # 命中缓存，复制到目标路径
-            try:
-                shutil.copy2(cache_path, dest)
-                # 更新访问时间（用于 LRU 清理）
-                os.utime(cache_path, None)
-                file_size = os.path.getsize(dest)
-                logger.info(f"Cache hit: {url[:80]}... -> {dest} ({file_size} bytes)")
-                return True
-            except Exception as e:
-                logger.warning(f"Failed to copy from cache: {e}, will re-download")
-                # 缓存复制失败，删除可能损坏的缓存文件
-                try:
-                    os.remove(cache_path)
-                except Exception:
-                    pass
-
-        # 未命中缓存，下载到缓存目录
-        logger.debug(f"Cache miss: {url[:80]}...")
-
-        # 先下载到临时文件
-        temp_cache_path = cache_path + '.downloading'
        try:
-            if not storage.download_file(url, temp_cache_path, max_retries=max_retries, timeout=timeout):
-                # 下载失败，清理临时文件
-                if os.path.exists(temp_cache_path):
-                    os.remove(temp_cache_path)
-                return False
+            cache_path = self.get_cache_path(url)
+            cached = os.path.exists(cache_path) and os.path.getsize(cache_path) > 0

-            # 下载成功，移动到正式缓存路径
-            if os.path.exists(cache_path):
-                os.remove(cache_path)
-            os.rename(temp_cache_path, cache_path)
-
-            # 复制到目标路径
-            shutil.copy2(cache_path, dest)
-            file_size = os.path.getsize(dest)
-            logger.info(f"Downloaded and cached: {url[:80]}... ({file_size} bytes)")
-
-            # 检查是否需要清理缓存
-            if self.max_size_bytes > 0:
-                self._cleanup_if_needed()
-
-            return True
-
-        except Exception as e:
-            logger.error(f"Cache download error: {e}")
-            # 清理临时文件
-            if os.path.exists(temp_cache_path):
+            if cached:
+                # 命中缓存，复制到目标路径
                try:
-                    os.remove(temp_cache_path)
-                except Exception:
-                    pass
-            return False
+                    shutil.copy2(cache_path, dest)
+                    # 更新访问时间（用于 LRU 清理）
+                    os.utime(cache_path, None)
+                    file_size = os.path.getsize(dest)
+                    logger.info(f"Cache hit: {url[:80]}... -> {dest} ({file_size} bytes)")
+                    return True
+                except Exception as e:
+                    logger.warning(f"Failed to copy from cache: {e}, will re-download")
+                    # 缓存复制失败，删除可能损坏的缓存文件
+                    try:
+                        os.remove(cache_path)
+                    except Exception:
+                        pass
+
+            # 未命中缓存，下载到缓存目录
+            logger.debug(f"Cache miss: {url[:80]}...")
+
+            # 先下载到临时文件（唯一文件名，避免并发覆盖）
+            temp_cache_path = os.path.join(
+                self.cache_dir,
+                f"{cache_key}.{uuid.uuid4().hex}.downloading"
+            )
+            try:
+                if not storage.download_file(url, temp_cache_path, max_retries=max_retries, timeout=timeout):
+                    # 下载失败，清理临时文件
+                    if os.path.exists(temp_cache_path):
+                        os.remove(temp_cache_path)
+                    return False
+
+                if not os.path.exists(temp_cache_path) or os.path.getsize(temp_cache_path) <= 0:
+                    if os.path.exists(temp_cache_path):
+                        os.remove(temp_cache_path)
+                    return False
+
+                # 下载成功，原子替换缓存文件
+                os.replace(temp_cache_path, cache_path)
+
+                # 复制到目标路径
+                shutil.copy2(cache_path, dest)
+                file_size = os.path.getsize(dest)
+                logger.info(f"Downloaded and cached: {url[:80]}... ({file_size} bytes)")
+
+                # 检查是否需要清理缓存
+                if self.max_size_bytes > 0:
+                    self._cleanup_if_needed()
+
+                return True
+
+            except Exception as e:
+                logger.error(f"Cache download error: {e}")
+                # 清理临时文件
+                if os.path.exists(temp_cache_path):
+                    try:
+                        os.remove(temp_cache_path)
+                    except Exception:
+                        pass
+                return False
+        finally:
+            self._release_lock(lock_path)

    def _cleanup_if_needed(self) -> None:
        """
@@ -209,7 +266,7 @@ class MaterialCache:
            total_size = 0

            for filename in os.listdir(self.cache_dir):
-                if filename.endswith('.downloading'):
+                if filename.endswith('.downloading') or filename.endswith('.lock'):
                    continue
                file_path = os.path.join(self.cache_dir, filename)
                if os.path.isfile(file_path):
@@ -275,7 +332,7 @@ class MaterialCache:
        total_size = 0

        for filename in os.listdir(self.cache_dir):
-            if filename.endswith('.downloading'):
+            if filename.endswith('.downloading') or filename.endswith('.lock'):
                continue
            file_path = os.path.join(self.cache_dir, filename)
            if os.path.isfile(file_path):