You've already forked FrameTour-RenderWorker
- 实现了带等待时间统计的缓存锁获取功能 - 新增 get_or_download_with_metrics 方法返回详细的性能指标 - 在 tracing span 中记录锁等待时间、锁获取状态和缓存路径使用情况 - 优化缓存命中路径避免不必要的锁获取操作 - 添加了缓存文件就绪检查和复制功能的独立方法 - 增加了针对缓存锁超时但仍可使用就绪缓存的处理逻辑 - 新增了多个单元测试验证缓存锁定和指标报告功能
584 lines
20 KiB
Python
584 lines
20 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
素材缓存服务
|
|
|
|
提供素材下载缓存功能,避免相同素材重复下载。
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import hashlib
|
|
import logging
|
|
import shutil
|
|
import time
|
|
import uuid
|
|
from typing import Any, Dict, Optional, Tuple
|
|
from urllib.parse import urlparse, unquote
|
|
|
|
import psutil
|
|
|
|
from services import storage
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _extract_cache_key(url: str) -> str:
|
|
"""
|
|
从 URL 提取缓存键
|
|
|
|
去除签名等查询参数,保留路径作为唯一标识。
|
|
|
|
Args:
|
|
url: 完整的素材 URL
|
|
|
|
Returns:
|
|
缓存键(URL 路径的 MD5 哈希)
|
|
"""
|
|
parsed = urlparse(url)
|
|
# 使用 scheme + host + path 作为唯一标识(忽略签名等查询参数)
|
|
cache_key_source = f"{parsed.scheme}://{parsed.netloc}{unquote(parsed.path)}"
|
|
return hashlib.md5(cache_key_source.encode('utf-8')).hexdigest()
|
|
|
|
|
|
def _get_file_extension(url: str) -> str:
|
|
"""
|
|
从 URL 提取文件扩展名
|
|
|
|
Args:
|
|
url: 素材 URL
|
|
|
|
Returns:
|
|
文件扩展名(如 .mp4, .png),无法识别时返回空字符串
|
|
"""
|
|
parsed = urlparse(url)
|
|
path = unquote(parsed.path)
|
|
_, ext = os.path.splitext(path)
|
|
return ext.lower() if ext else ''
|
|
|
|
|
|
class MaterialCache:
|
|
"""
|
|
素材缓存管理器
|
|
|
|
负责素材文件的缓存存储和检索。
|
|
"""
|
|
|
|
LOCK_TIMEOUT_SEC = 30.0
|
|
LOCK_POLL_INTERVAL_SEC = 0.1
|
|
LOCK_STALE_SECONDS = 24 * 60 * 60
|
|
DOWNLOAD_LOCK_TIMEOUT_SEC = 5.0
|
|
|
|
def __init__(self, cache_dir: str, enabled: bool = True, max_size_gb: float = 0):
|
|
"""
|
|
初始化缓存管理器
|
|
|
|
Args:
|
|
cache_dir: 缓存目录路径
|
|
enabled: 是否启用缓存
|
|
max_size_gb: 最大缓存大小(GB),0 表示不限制
|
|
"""
|
|
self.cache_dir = cache_dir
|
|
self.enabled = enabled
|
|
self.max_size_bytes = int(max_size_gb * 1024 * 1024 * 1024) if max_size_gb > 0 else 0
|
|
|
|
if self.enabled:
|
|
os.makedirs(self.cache_dir, exist_ok=True)
|
|
logger.info(f"Material cache initialized: {cache_dir}")
|
|
|
|
def get_cache_path(self, url: str) -> str:
|
|
"""
|
|
获取素材的缓存文件路径
|
|
|
|
Args:
|
|
url: 素材 URL
|
|
|
|
Returns:
|
|
缓存文件的完整路径
|
|
"""
|
|
cache_key = _extract_cache_key(url)
|
|
ext = _get_file_extension(url)
|
|
filename = f"{cache_key}{ext}"
|
|
return os.path.join(self.cache_dir, filename)
|
|
|
|
def _get_lock_path(self, cache_key: str) -> str:
|
|
"""获取缓存锁文件路径"""
|
|
assert self.cache_dir
|
|
return os.path.join(self.cache_dir, f"{cache_key}.lock")
|
|
|
|
def _write_lock_metadata(self, lock_fd: int, lock_path: str) -> bool:
|
|
"""写入锁元数据,失败则清理锁文件"""
|
|
try:
|
|
try:
|
|
process_start_time = psutil.Process(os.getpid()).create_time()
|
|
except Exception as e:
|
|
process_start_time = None
|
|
logger.warning(f"Cache lock process start time error: {e}")
|
|
metadata = {
|
|
'pid': os.getpid(),
|
|
'process_start_time': process_start_time,
|
|
'created_at': time.time()
|
|
}
|
|
with os.fdopen(lock_fd, 'w', encoding='utf-8') as lock_file:
|
|
json.dump(metadata, lock_file)
|
|
return True
|
|
except Exception as e:
|
|
try:
|
|
os.close(lock_fd)
|
|
except Exception:
|
|
pass
|
|
self._remove_lock_file(lock_path, f"write metadata failed: {e}")
|
|
return False
|
|
|
|
def _read_lock_metadata(self, lock_path: str) -> Optional[dict]:
|
|
"""读取锁元数据,失败返回 None(兼容历史空锁文件)"""
|
|
try:
|
|
with open(lock_path, 'r', encoding='utf-8') as lock_file:
|
|
data = json.load(lock_file)
|
|
return data if isinstance(data, dict) else None
|
|
except Exception:
|
|
return None
|
|
|
|
def _is_process_alive(self, pid: int, expected_start_time: Optional[float]) -> bool:
|
|
"""判断进程是否存活并校验启动时间(防止 PID 复用)"""
|
|
try:
|
|
process = psutil.Process(pid)
|
|
if expected_start_time is None:
|
|
return process.is_running()
|
|
actual_start_time = process.create_time()
|
|
return abs(actual_start_time - expected_start_time) < 1.0
|
|
except psutil.NoSuchProcess:
|
|
return False
|
|
except Exception as e:
|
|
logger.warning(f"Cache lock process check error: {e}")
|
|
return True
|
|
|
|
def _is_lock_stale(self, lock_path: str) -> bool:
|
|
"""判断锁是否过期(进程已退出或超过最大存活时长)"""
|
|
if not os.path.exists(lock_path):
|
|
return False
|
|
now = time.time()
|
|
metadata = self._read_lock_metadata(lock_path)
|
|
if metadata:
|
|
created_at = metadata.get('created_at')
|
|
if isinstance(created_at, (int, float)) and now - created_at > self.LOCK_STALE_SECONDS:
|
|
return True
|
|
pid = metadata.get('pid')
|
|
pid_value = int(pid) if isinstance(pid, int) or (isinstance(pid, str) and pid.isdigit()) else None
|
|
expected_start_time = metadata.get('process_start_time')
|
|
expected_start_time_value = (
|
|
expected_start_time if isinstance(expected_start_time, (int, float)) else None
|
|
)
|
|
if pid_value is not None and not self._is_process_alive(pid_value, expected_start_time_value):
|
|
return True
|
|
return self._is_lock_stale_by_mtime(lock_path, now)
|
|
return self._is_lock_stale_by_mtime(lock_path, now)
|
|
|
|
def _is_lock_stale_by_mtime(self, lock_path: str, now: float) -> bool:
|
|
"""基于文件时间判断锁是否过期"""
|
|
try:
|
|
mtime = os.path.getmtime(lock_path)
|
|
return now - mtime > self.LOCK_STALE_SECONDS
|
|
except Exception as e:
|
|
logger.warning(f"Cache lock stat error: {e}")
|
|
return False
|
|
|
|
def _remove_lock_file(self, lock_path: str, reason: str = "") -> bool:
|
|
"""删除锁文件"""
|
|
try:
|
|
os.remove(lock_path)
|
|
if reason:
|
|
logger.info(f"Cache lock removed: {lock_path} ({reason})")
|
|
return True
|
|
except FileNotFoundError:
|
|
return True
|
|
except Exception as e:
|
|
logger.warning(f"Cache lock remove error: {e}")
|
|
return False
|
|
|
|
def _acquire_lock(self, cache_key: str, timeout_sec: Optional[float] = None) -> Optional[str]:
|
|
"""获取缓存锁(跨进程安全)"""
|
|
if not self.enabled:
|
|
return None
|
|
|
|
wait_timeout_sec = self.LOCK_TIMEOUT_SEC if timeout_sec is None else max(float(timeout_sec), 0.0)
|
|
lock_path = self._get_lock_path(cache_key)
|
|
deadline = time.monotonic() + wait_timeout_sec
|
|
|
|
while True:
|
|
try:
|
|
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
if not self._write_lock_metadata(fd, lock_path):
|
|
return None
|
|
return lock_path
|
|
except FileExistsError:
|
|
if self._is_lock_stale(lock_path):
|
|
removed = self._remove_lock_file(lock_path, "stale lock")
|
|
if removed:
|
|
continue
|
|
if time.monotonic() >= deadline:
|
|
logger.warning(f"Cache lock timeout ({wait_timeout_sec:.1f}s): {lock_path}")
|
|
return None
|
|
time.sleep(self.LOCK_POLL_INTERVAL_SEC)
|
|
except Exception as e:
|
|
logger.warning(f"Cache lock error: {e}")
|
|
return None
|
|
|
|
def _acquire_lock_with_wait(
|
|
self,
|
|
cache_key: str,
|
|
timeout_sec: Optional[float] = None
|
|
) -> Tuple[Optional[str], int]:
|
|
"""获取缓存锁并返回等待时长(毫秒)"""
|
|
start_time = time.monotonic()
|
|
lock_path = self._acquire_lock(cache_key, timeout_sec=timeout_sec)
|
|
lock_wait_ms = max(int((time.monotonic() - start_time) * 1000), 0)
|
|
return lock_path, lock_wait_ms
|
|
|
|
def _release_lock(self, lock_path: Optional[str]) -> None:
|
|
"""释放缓存锁"""
|
|
if not lock_path:
|
|
return
|
|
self._remove_lock_file(lock_path)
|
|
|
|
def is_cached(self, url: str) -> Tuple[bool, str]:
|
|
"""
|
|
检查素材是否已缓存
|
|
|
|
Args:
|
|
url: 素材 URL
|
|
|
|
Returns:
|
|
(是否已缓存, 缓存文件路径)
|
|
"""
|
|
if not self.enabled:
|
|
return False, ''
|
|
|
|
cache_path = self.get_cache_path(url)
|
|
exists = os.path.exists(cache_path) and os.path.getsize(cache_path) > 0
|
|
return exists, cache_path
|
|
|
|
def _is_cache_file_ready(self, cache_path: str) -> bool:
|
|
"""缓存文件是否已就绪(存在且大小大于 0)"""
|
|
try:
|
|
return os.path.exists(cache_path) and os.path.getsize(cache_path) > 0
|
|
except Exception:
|
|
return False
|
|
|
|
def _copy_cache_to_dest(self, cache_path: str, dest: str) -> Tuple[bool, int]:
|
|
"""将缓存文件复制到目标路径并返回结果与文件大小"""
|
|
try:
|
|
shutil.copy2(cache_path, dest)
|
|
try:
|
|
os.utime(cache_path, None)
|
|
except Exception as e:
|
|
logger.debug(f"Failed to update cache access time: {e}")
|
|
file_size = os.path.getsize(dest) if os.path.exists(dest) else 0
|
|
return True, file_size
|
|
except Exception as e:
|
|
logger.warning(f"Failed to copy from cache: {e}")
|
|
return False, 0
|
|
|
|
def get_or_download(
|
|
self,
|
|
url: str,
|
|
dest: str,
|
|
timeout: int = 300,
|
|
max_retries: int = 5
|
|
) -> bool:
|
|
"""兼容旧接口:返回下载是否成功。"""
|
|
result, _ = self.get_or_download_with_metrics(
|
|
url=url,
|
|
dest=dest,
|
|
timeout=timeout,
|
|
max_retries=max_retries,
|
|
)
|
|
return result
|
|
|
|
def get_or_download_with_metrics(
|
|
self,
|
|
url: str,
|
|
dest: str,
|
|
timeout: int = 300,
|
|
max_retries: int = 5
|
|
) -> Tuple[bool, Dict[str, Any]]:
|
|
"""
|
|
从缓存获取素材,若未缓存则下载并缓存,并返回关键指标。
|
|
|
|
Args:
|
|
url: 素材 URL
|
|
dest: 目标文件路径(任务工作目录中的路径)
|
|
timeout: 下载超时时间(秒)
|
|
max_retries: 最大重试次数
|
|
|
|
Returns:
|
|
(是否成功, 指标字典)
|
|
"""
|
|
metrics: Dict[str, Any] = {
|
|
"lock_wait_ms": 0,
|
|
"lock_acquired": False,
|
|
"cache_path_used": "unknown",
|
|
}
|
|
|
|
# 确保目标目录存在
|
|
dest_dir = os.path.dirname(dest)
|
|
if dest_dir:
|
|
os.makedirs(dest_dir, exist_ok=True)
|
|
|
|
# 缓存未启用时直接下载
|
|
if not self.enabled:
|
|
result = storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)
|
|
metrics["cache_path_used"] = "direct"
|
|
return result, metrics
|
|
|
|
cache_key = _extract_cache_key(url)
|
|
cache_path = self.get_cache_path(url)
|
|
|
|
def _try_serve_from_cache(log_prefix: str, delete_on_failure: bool = False) -> bool:
|
|
if not self._is_cache_file_ready(cache_path):
|
|
return False
|
|
copied, file_size = self._copy_cache_to_dest(cache_path, dest)
|
|
if copied:
|
|
metrics["cache_path_used"] = "cache"
|
|
logger.info(f"{log_prefix}: {url[:80]}... -> {dest} ({file_size} bytes)")
|
|
return True
|
|
if delete_on_failure:
|
|
try:
|
|
os.remove(cache_path)
|
|
except Exception:
|
|
pass
|
|
return False
|
|
|
|
if _try_serve_from_cache("Cache hit"):
|
|
return True, metrics
|
|
|
|
lock_path, lock_wait_ms = self._acquire_lock_with_wait(
|
|
cache_key,
|
|
timeout_sec=self.DOWNLOAD_LOCK_TIMEOUT_SEC,
|
|
)
|
|
metrics["lock_wait_ms"] = lock_wait_ms
|
|
if not lock_path:
|
|
if _try_serve_from_cache("Cache hit after lock timeout"):
|
|
return True, metrics
|
|
logger.warning(f"Cache lock unavailable, downloading without cache: {url[:80]}...")
|
|
result = storage.download_file(url, dest, max_retries=max_retries, timeout=timeout)
|
|
metrics["cache_path_used"] = "direct"
|
|
return result, metrics
|
|
|
|
metrics["lock_acquired"] = True
|
|
|
|
try:
|
|
if _try_serve_from_cache("Cache hit", delete_on_failure=True):
|
|
return True, metrics
|
|
|
|
# 未命中缓存,下载到缓存目录
|
|
logger.debug(f"Cache miss: {url[:80]}...")
|
|
|
|
# 先下载到临时文件(唯一文件名,避免并发覆盖)
|
|
temp_cache_path = os.path.join(
|
|
self.cache_dir,
|
|
f"{cache_key}.{uuid.uuid4().hex}.downloading"
|
|
)
|
|
try:
|
|
if not storage.download_file(url, temp_cache_path, max_retries=max_retries, timeout=timeout):
|
|
# 下载失败,清理临时文件
|
|
if os.path.exists(temp_cache_path):
|
|
os.remove(temp_cache_path)
|
|
return False, metrics
|
|
|
|
if not os.path.exists(temp_cache_path) or os.path.getsize(temp_cache_path) <= 0:
|
|
if os.path.exists(temp_cache_path):
|
|
os.remove(temp_cache_path)
|
|
return False, metrics
|
|
|
|
# 下载成功,原子替换缓存文件
|
|
os.replace(temp_cache_path, cache_path)
|
|
|
|
# 复制到目标路径
|
|
if not _try_serve_from_cache("Downloaded and cached", delete_on_failure=False):
|
|
return False, metrics
|
|
|
|
# 检查是否需要清理缓存
|
|
if self.max_size_bytes > 0:
|
|
self._cleanup_if_needed()
|
|
|
|
return True, metrics
|
|
|
|
except Exception as e:
|
|
logger.error(f"Cache download error: {e}")
|
|
# 清理临时文件
|
|
if os.path.exists(temp_cache_path):
|
|
try:
|
|
os.remove(temp_cache_path)
|
|
except Exception:
|
|
pass
|
|
return False, metrics
|
|
finally:
|
|
self._release_lock(lock_path)
|
|
|
|
def add_to_cache(self, url: str, source_path: str) -> bool:
|
|
"""
|
|
将本地文件添加到缓存
|
|
|
|
Args:
|
|
url: 对应的 URL(用于生成缓存键)
|
|
source_path: 本地文件路径
|
|
|
|
Returns:
|
|
是否成功
|
|
"""
|
|
if not self.enabled:
|
|
return False
|
|
|
|
if not os.path.exists(source_path):
|
|
logger.warning(f"Source file not found for cache: {source_path}")
|
|
return False
|
|
|
|
cache_key = _extract_cache_key(url)
|
|
lock_path = self._acquire_lock(cache_key)
|
|
if not lock_path:
|
|
logger.warning(f"Cache lock unavailable for adding: {url[:80]}...")
|
|
return False
|
|
|
|
try:
|
|
cache_path = self.get_cache_path(url)
|
|
|
|
# 先复制到临时文件
|
|
temp_cache_path = os.path.join(
|
|
self.cache_dir,
|
|
f"{cache_key}.{uuid.uuid4().hex}.adding"
|
|
)
|
|
|
|
shutil.copy2(source_path, temp_cache_path)
|
|
|
|
# 原子替换
|
|
os.replace(temp_cache_path, cache_path)
|
|
|
|
# 更新访问时间
|
|
os.utime(cache_path, None)
|
|
|
|
logger.info(f"Added to cache: {url[:80]}... <- {source_path}")
|
|
|
|
# 检查清理
|
|
if self.max_size_bytes > 0:
|
|
self._cleanup_if_needed()
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to add to cache: {e}")
|
|
if 'temp_cache_path' in locals() and os.path.exists(temp_cache_path):
|
|
try:
|
|
os.remove(temp_cache_path)
|
|
except Exception:
|
|
pass
|
|
return False
|
|
finally:
|
|
self._release_lock(lock_path)
|
|
|
|
def _cleanup_if_needed(self) -> None:
|
|
"""
|
|
检查并清理缓存(LRU 策略)
|
|
|
|
当缓存大小超过限制时,删除最久未访问的文件。
|
|
"""
|
|
if self.max_size_bytes <= 0:
|
|
return
|
|
|
|
try:
|
|
# 获取所有缓存文件及其信息
|
|
cache_files = []
|
|
total_size = 0
|
|
|
|
for filename in os.listdir(self.cache_dir):
|
|
if filename.endswith('.downloading') or filename.endswith('.lock'):
|
|
continue
|
|
file_path = os.path.join(self.cache_dir, filename)
|
|
if os.path.isfile(file_path):
|
|
stat = os.stat(file_path)
|
|
cache_files.append({
|
|
'path': file_path,
|
|
'size': stat.st_size,
|
|
'atime': stat.st_atime
|
|
})
|
|
total_size += stat.st_size
|
|
|
|
# 如果未超过限制,无需清理
|
|
if total_size <= self.max_size_bytes:
|
|
return
|
|
|
|
# 按访问时间排序(最久未访问的在前)
|
|
cache_files.sort(key=lambda x: x['atime'])
|
|
|
|
# 删除文件直到低于限制的 80%
|
|
target_size = int(self.max_size_bytes * 0.8)
|
|
deleted_count = 0
|
|
|
|
for file_info in cache_files:
|
|
if total_size <= target_size:
|
|
break
|
|
# 从文件名提取 cache_key,检查是否有锁(说明正在被使用)
|
|
filename = os.path.basename(file_info['path'])
|
|
cache_key = os.path.splitext(filename)[0]
|
|
lock_path = self._get_lock_path(cache_key)
|
|
if os.path.exists(lock_path):
|
|
if self._is_lock_stale(lock_path):
|
|
self._remove_lock_file(lock_path, "cleanup stale lock")
|
|
else:
|
|
# 该文件正在被其他任务使用,跳过删除
|
|
logger.debug(f"Cache cleanup: skipping locked file {filename}")
|
|
continue
|
|
try:
|
|
os.remove(file_info['path'])
|
|
total_size -= file_info['size']
|
|
deleted_count += 1
|
|
except Exception as e:
|
|
logger.warning(f"Failed to delete cache file: {e}")
|
|
|
|
if deleted_count > 0:
|
|
logger.info(f"Cache cleanup: deleted {deleted_count} files, current size: {total_size / (1024*1024*1024):.2f} GB")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Cache cleanup error: {e}")
|
|
|
|
def clear(self) -> None:
|
|
"""清空所有缓存"""
|
|
if not self.enabled:
|
|
return
|
|
|
|
try:
|
|
if os.path.exists(self.cache_dir):
|
|
shutil.rmtree(self.cache_dir)
|
|
os.makedirs(self.cache_dir, exist_ok=True)
|
|
logger.info("Cache cleared")
|
|
except Exception as e:
|
|
logger.error(f"Failed to clear cache: {e}")
|
|
|
|
def get_stats(self) -> dict:
|
|
"""
|
|
获取缓存统计信息
|
|
|
|
Returns:
|
|
包含缓存统计的字典
|
|
"""
|
|
if not self.enabled or not os.path.exists(self.cache_dir):
|
|
return {'enabled': False, 'file_count': 0, 'total_size_mb': 0}
|
|
|
|
file_count = 0
|
|
total_size = 0
|
|
|
|
for filename in os.listdir(self.cache_dir):
|
|
if filename.endswith('.downloading') or filename.endswith('.lock'):
|
|
continue
|
|
file_path = os.path.join(self.cache_dir, filename)
|
|
if os.path.isfile(file_path):
|
|
file_count += 1
|
|
total_size += os.path.getsize(file_path)
|
|
|
|
return {
|
|
'enabled': True,
|
|
'cache_dir': self.cache_dir,
|
|
'file_count': file_count,
|
|
'total_size_mb': round(total_size / (1024 * 1024), 2),
|
|
'max_size_gb': self.max_size_bytes / (1024 * 1024 * 1024) if self.max_size_bytes > 0 else 0
|
|
}
|