init datamate

This commit is contained in:
Dallas98
2025-10-21 23:00:48 +08:00
commit 1c97afed7d
692 changed files with 135442 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
from .cmd_task_scheduler import CommandScheduler
from .func_task_scheduler import CallableScheduler
cmd_scheduler = CommandScheduler(max_concurrent=5)
func_scheduler = CallableScheduler(max_concurrent=5)

View File

@@ -0,0 +1,214 @@
import asyncio
from datetime import datetime
from typing import Optional, List
from loguru import logger
from .scheduler import Task, TaskStatus, TaskResult, TaskScheduler
class CommandTask(Task):
"""命令任务包装类"""
def __init__(self, task_id: str, command: str, shell: bool = True,
timeout: Optional[int] = None, *args, **kwargs):
super().__init__(task_id, *args, **kwargs)
self.command = command
self.shell = shell
self.timeout = timeout
self.stdout = None
self.stderr = None
self.return_code = None
self._process = None
def start(self) -> 'CommandTask':
"""启动任务"""
if self.status == TaskStatus.PENDING:
self.status = TaskStatus.RUNNING
self.started_at = datetime.now()
self._task = asyncio.create_task(self._execute())
return self
async def _execute(self):
"""执行命令"""
try:
self.status = TaskStatus.RUNNING
self.started_at = datetime.now()
# 使用 asyncio.create_subprocess_shell 或 create_subprocess_exec
if self.shell:
process = await asyncio.create_subprocess_shell(
self.command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
**self.kwargs
)
else:
process = await asyncio.create_subprocess_exec(
*self.command.split(),
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
**self.kwargs
)
self._process = process
# 等待进程完成(带超时)
try:
if self.timeout:
stdout, stderr = await asyncio.wait_for(
process.communicate(),
timeout=self.timeout
)
else:
stdout, stderr = await process.communicate()
self.stdout = stdout.decode() if stdout else ""
self.stderr = stderr.decode() if stderr else ""
self.return_code = process.returncode
if self._cancelled:
self.status = TaskStatus.CANCELLED
elif process.returncode == 0:
self.status = TaskStatus.COMPLETED
else:
self.status = TaskStatus.FAILED
except asyncio.TimeoutError:
# 超时处理
self._process.terminate()
try:
await asyncio.wait_for(self._process.wait(), timeout=5.0)
except asyncio.TimeoutError:
self._process.kill()
await self._process.wait()
self.status = TaskStatus.FAILED
self.stderr = f"Command timed out after {self.timeout} seconds"
except asyncio.CancelledError:
# 任务被取消
if self._process:
self._process.terminate()
try:
await asyncio.wait_for(self._process.wait(), timeout=5.0)
except asyncio.TimeoutError:
self._process.kill()
await self._process.wait()
self.status = TaskStatus.CANCELLED
self._cancelled = True
except Exception as e:
self.status = TaskStatus.FAILED
self.stderr = str(e)
finally:
self.completed_at = datetime.now()
def cancel(self) -> bool:
"""取消任务"""
if self._process and self.status == TaskStatus.RUNNING:
try:
# 尝试优雅终止
self._process.terminate()
self._cancelled = True
return True
except Exception:
# 如果无法终止,强制杀死
try:
self._process.kill()
self._cancelled = True
return True
except Exception:
return False
return False
def to_result(self) -> TaskResult:
"""转换为结果对象"""
self.result = {
"command": self.command,
"stdout": self.stdout,
"stderr": self.stderr,
"return_code": self.return_code,
}
return super().to_result()
class CommandScheduler(TaskScheduler):
"""命令调度器"""
def __init__(self, max_concurrent: int = 5):
super().__init__(max_concurrent)
async def submit(self, task_id, command: str, shell: bool = True,
timeout: Optional[int] = None, **kwargs) -> str:
"""提交命令任务"""
task = CommandTask(task_id, command, shell, timeout, **kwargs)
self.tasks[task_id] = task
# 使用信号量限制并发
async with self.semaphore:
# 异步执行任务
task.start()
logger.info(f"命令任务 {task_id} 已提交并开始执行")
return task_id
def get_task_status(self, task_id: str) -> Optional[TaskResult]:
"""获取任务状态"""
task = self.tasks.get(task_id)
if task:
return task.to_result()
return None
def get_all_tasks(self) -> List[TaskResult]:
"""获取所有任务状态"""
return [task.to_result() for task in self.tasks.values()]
def cancel_task(self, task_id: str) -> bool:
"""取消任务"""
task = self.tasks.get(task_id)
if not task:
return True
if task.status == TaskStatus.RUNNING:
cancelled = task.cancel()
if cancelled:
logger.info(f"命令任务 {task_id} 已取消")
return cancelled
return False
def get_tasks_by_status(self, status: TaskStatus) -> List[TaskResult]:
"""根据状态获取任务"""
return [
task.to_result()
for task in self.tasks.values()
if task.status == status
]
async def wait_for_task(self, task_id: str, timeout: Optional[float] = None) -> TaskResult:
"""等待任务完成"""
task = self.tasks.get(task_id)
if not task:
raise ValueError(f"任务 {task_id} 不存在")
if task.status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
return task.to_result()
# 对于运行中的任务,我们已经通过 await task.execute() 等待了
# 所以这里直接返回结果
return task.to_result()
async def shutdown(self):
"""关闭调度器,取消所有运行中的任务"""
logger.info("正在关闭命令调度器...")
running_tasks = [
task for task in self.tasks.values()
if task.status == TaskStatus.RUNNING
]
for task in running_tasks:
logger.info(f"取消运行中的命令任务: {task.task_id}")
task.cancel()
logger.info("命令调度器已关闭")

View File

@@ -0,0 +1,133 @@
import asyncio
from datetime import datetime
from typing import Callable, Optional, List
from loguru import logger
from .scheduler import TaskStatus, TaskResult, Task, TaskScheduler
class CallableTask(Task):
"""任务包装类"""
def __init__(self, task_id: str, func: Callable, *args, **kwargs):
super().__init__(task_id, *args, **kwargs)
self.func = func
def start(self) -> 'CallableTask':
"""启动任务"""
if self.status == TaskStatus.PENDING:
self.status = TaskStatus.RUNNING
self.started_at = datetime.now()
self._task = asyncio.create_task(self._execute())
return self
async def _execute(self):
"""执行任务"""
try:
self.result = await self.func(*self.args, **self.kwargs)
self.status = TaskStatus.COMPLETED
except asyncio.CancelledError:
self.status = TaskStatus.CANCELLED
self._cancelled = True
except Exception as e:
self.status = TaskStatus.FAILED
self.error = str(e)
finally:
self.completed_at = datetime.now()
def cancel(self) -> bool:
"""取消任务"""
if self._task and not self._task.done():
self._task.cancel()
return True
return False
class CallableScheduler(TaskScheduler):
"""异步任务调度器"""
def __init__(self, max_concurrent: int = 10):
super().__init__(max_concurrent)
async def submit(self, task_id, func: Callable, *args, **kwargs) -> str:
"""提交任务"""
task = CallableTask(task_id, func, *args, **kwargs)
self.tasks[task_id] = task
# 使用信号量限制并发
async with self.semaphore:
task.start()
logger.info(f"任务 {task_id} 已提交并开始执行")
return task_id
def get_task_status(self, task_id: str) -> Optional[TaskResult]:
"""获取任务状态"""
task = self.tasks.get(task_id)
if task:
return task.to_result()
return None
def get_all_tasks(self) -> List[TaskResult]:
"""获取所有任务状态"""
return [task.to_result() for task in self.tasks.values()]
def cancel_task(self, task_id: str) -> bool:
"""取消任务"""
task = self.tasks.get(task_id)
if task and task.status == TaskStatus.RUNNING:
cancelled = task.cancel()
if cancelled:
logger.info(f"任务 {task_id} 已取消")
return cancelled
return False
def get_tasks_by_status(self, status: TaskStatus) -> List[TaskResult]:
"""根据状态获取任务"""
return [
task.to_result()
for task in self.tasks.values()
if task.status == status
]
async def wait_for_task(self, task_id: str, timeout: Optional[float] = None) -> TaskResult:
"""等待任务完成"""
task = self.tasks.get(task_id)
if not task:
raise ValueError(f"任务 {task_id} 不存在")
if task.status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
return task.to_result()
# 等待任务完成
if task.get():
try:
await asyncio.wait_for(task.get(), timeout=timeout)
except asyncio.TimeoutError:
raise TimeoutError(f"任务 {task_id} 超时")
return task.to_result()
async def shutdown(self):
"""关闭调度器,取消所有运行中的任务"""
logger.info("正在关闭调度器...")
running_tasks = [
task for task in self.tasks.values()
if task.status == TaskStatus.RUNNING
]
for task in running_tasks:
logger.info(f"取消运行中的任务: {task.task_id}")
task.cancel()
# 等待所有任务完成
for task in running_tasks:
if task.get() and not task.get().done():
try:
await asyncio.wait_for(task.get(), timeout=5.0)
except asyncio.TimeoutError:
logger.warning(f"任务 {task.task_id} 无法正常停止")
logger.info("调度器已关闭")

View File

@@ -0,0 +1,160 @@
# 任务状态枚举
import asyncio
import signal
import sys
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Any, Optional, Dict, List
from loguru import logger
class TaskStatus(Enum):
PENDING = "pending" # 等待执行
RUNNING = "running" # 正在运行
COMPLETED = "completed" # 已完成
FAILED = "failed" # 执行失败
CANCELLED = "cancelled" # 已取消
@dataclass
class TaskResult:
"""任务结果数据类"""
task_id: str
status: TaskStatus
result: Any = None
error: Optional[str] = None
created_at: datetime = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
progress: float = 0.0
class Task:
def __init__(self, task_id: str, *args, **kwargs):
self.task_id = task_id
self.args = args
self.kwargs = kwargs
self.status = TaskStatus.PENDING
self.result = None
self.error = None
self.created_at = datetime.now()
self.started_at = None
self.completed_at = None
self.progress = 0.0
self._task = None # asyncio.Task 实例
self._cancelled = False
def get(self):
return self._task
def start(self) -> 'Task':
"""启动任务"""
pass
async def _execute(self):
"""执行任务"""
pass
def cancel(self) -> bool:
"""取消任务"""
pass
def to_result(self) -> TaskResult:
"""转换为结果对象"""
return TaskResult(
task_id=self.task_id,
status=self.status,
result=self.result,
error=self.error,
created_at=self.created_at,
started_at=self.started_at,
completed_at=self.completed_at,
progress=self.progress
)
class TaskScheduler:
"""异步任务调度器"""
def __init__(self, max_concurrent: int = 10):
self.max_concurrent = max_concurrent
self.tasks: Dict[str, Task] = {}
self.semaphore = asyncio.Semaphore(max_concurrent)
# 注册信号处理器
try:
signal.signal(signal.SIGINT, self._signal_handler)
signal.signal(signal.SIGTERM, self._signal_handler)
except (ValueError, AttributeError):
# 在某些平台上可能不支持
pass
def _signal_handler(self, signum, frame):
"""信号处理器"""
logger.info(f"收到信号 {signum},正在清理任务...")
asyncio.create_task(self.shutdown())
sys.exit(0)
async def submit(self, task_id, task, *args, **kwargs) -> str:
"""提交任务"""
pass
def get_task_status(self, task_id: str) -> Optional[TaskResult]:
"""获取任务状态"""
task = self.tasks.get(task_id)
if task:
return task.to_result()
return None
def get_all_tasks(self) -> List[TaskResult]:
"""获取所有任务状态"""
return [task.to_result() for task in self.tasks.values()]
def cancel_task(self, task_id: str) -> bool:
"""取消任务"""
task = self.tasks.get(task_id)
if task and task.status == TaskStatus.RUNNING:
cancelled = task.cancel()
if cancelled:
logger.info(f"任务 {task_id} 已取消")
return cancelled
return False
def get_tasks_by_status(self, status: TaskStatus) -> List[TaskResult]:
"""根据状态获取任务"""
return [
task.to_result()
for task in self.tasks.values()
if task.status == status
]
async def wait_for_task(self, task_id: str, timeout: Optional[float] = None) -> TaskResult:
"""等待任务完成"""
pass
async def shutdown(self):
"""关闭调度器,取消所有运行中的任务"""
pass
def get_statistics(self) -> Dict[str, int]:
"""获取统计信息"""
stats = {
TaskStatus.PENDING: 0,
TaskStatus.RUNNING: 0,
TaskStatus.COMPLETED: 0,
TaskStatus.FAILED: 0,
TaskStatus.CANCELLED: 0
}
for task in self.tasks.values():
stats[task.status] += 1
return {
"pending": stats[TaskStatus.PENDING],
"running": stats[TaskStatus.RUNNING],
"completed": stats[TaskStatus.COMPLETED],
"failed": stats[TaskStatus.FAILED],
"cancelled": stats[TaskStatus.CANCELLED],
"total": len(self.tasks)
}