feature: add data-evaluation

* feature: add evaluation task management function * feature: add evaluation task detail page * fix: delete duplicate definition for table t_model_config * refactor: rename package synthesis to ratio * refactor: add eval file table and refactor related code * fix: calling large models in parallel during evaluation
2025-12-04 09:23:54 +08:00
parent 265e284fb8
commit 1d19cd3a62
52 changed files with 2882 additions and 1244 deletions
--- a/runtime/datamate-python/app/core/exception.py
+++ b/runtime/datamate-python/app/core/exception.py
@@ -0,0 +1,17 @@
+from enum import Enum
+
+class BusinessErrorCode:
+    def __init__(self, message: str, error_code: str):
+        self.message = message
+        self.error_code = error_code
+
+
+class BusinessException(RuntimeError):
+    def __init__(self, business_error_code: BusinessErrorCode):
+        self.message = business_error_code.message
+        self.error_code = business_error_code.error_code
+        super().__init__(self.message)
+
+
+class BusinessErrorCodeEnum(Enum):
+    TASK_TYPE_ERROR = BusinessErrorCode("任务类型错误", "evaluation.0001")
--- a/runtime/datamate-python/app/db/models/init.py
+++ b/runtime/datamate-python/app/db/models/init.py
@@ -16,6 +16,11 @@ from .annotation_management import (
    LabelingProject
 )

+from .data_evaluation import (
+    EvaluationTask,
+    EvaluationItem
+)
+
 __all__ = [
    "Dataset",
    "DatasetTag",
@@ -25,4 +30,6 @@ __all__ = [
    "User",
    "AnnotationTemplate",
    "LabelingProject",
-]
+    "EvaluationTask",
+    "EvaluationItem",
+]
--- a/runtime/datamate-python/app/db/models/data_evaluation.py
+++ b/runtime/datamate-python/app/db/models/data_evaluation.py
@@ -0,0 +1,81 @@
+"""
+Tables for Data Evaluation module
+
+Derived from scripts/db/data-evaluation-init.sql
+ - t_de_eval_task
+ - t_de_eval_item
+"""
+
+import uuid
+from sqlalchemy import Column, String, Text, Float, TIMESTAMP, ForeignKey, Integer
+from sqlalchemy.sql import func
+
+from app.db.session import Base
+
+
+class EvaluationTask(Base):
+    """评估任务表（UUID 主键） -> t_de_eval_task
+
+    Columns per data-evaluation-init.sql:
+      id, name, description, task_type, source_type, source_id, source_name,
+      status, eval_process, eval_promt, eval_config, created_at, updated_at,
+      created_by, updated_by
+    """
+
+    __tablename__ = "t_de_eval_task"
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    name = Column(String(255), nullable=False, comment="评估任务名称")
+    description = Column(Text, nullable=True, comment="评估任务描述")
+    task_type = Column(String(50), nullable=False, comment="评估任务类型：QA")
+    source_type = Column(String(36), nullable=True, comment="待评估对象类型：DATASET/SYNTHESIS")
+    source_id = Column(String(36), nullable=True, comment="待评估对象ID")
+    source_name = Column(String(255), nullable=True, comment="待评估对象名称")
+    status = Column(String(50), server_default="PENDING", nullable=False, comment="状态：PENDING/RUNNING/COMPLETED/STOPPED/FAILED")
+    eval_process = Column(Float, nullable=False, server_default="0", comment="评估进度")
+    eval_prompt = Column(Text, nullable=True, comment="评估提示词")
+    eval_config = Column(Text, nullable=True, comment="评估配置")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_by = Column(String(255), nullable=True, comment="创建者")
+    updated_by = Column(String(255), nullable=True, comment="更新者")
+
+
+class EvaluationFile(Base):
+    """评估条目表（UUID 主键） -> t_de_eval_file"""
+
+    __tablename__ = "t_de_eval_file"
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    task_id = Column(String(36), ForeignKey('t_de_eval_task.id'), nullable=False, comment="评估任务ID")
+    file_id = Column(String(36), ForeignKey('t_dm_dataset_files.id'), nullable=True, comment="文件ID")
+    file_name = Column(String(255), nullable=False, comment="文件名")
+    total_count = Column(Integer, nullable=False, default=0, comment="总数")
+    evaluated_count = Column(Integer, nullable=False, default=0, comment="已评估数")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_by = Column(String(255), nullable=True, comment="创建者")
+    updated_by = Column(String(255), nullable=True, comment="更新者")
+
+
+class EvaluationItem(Base):
+    """评估条目表（UUID 主键） -> t_de_eval_item
+
+    Columns per data-evaluation-init.sql:
+      id, task_id, item_id, eval_score, eval_result, status
+    """
+
+    __tablename__ = "t_de_eval_item"
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    task_id = Column(String(36), ForeignKey('t_de_eval_task.id'), nullable=False, comment="评估任务ID")
+    file_id = Column(String(36), ForeignKey('t_dm_dataset_files.id'), nullable=True, comment="文件ID")
+    item_id = Column(String(36), nullable=False, comment="评估条目ID")
+    eval_content = Column(Text, nullable=True, comment="评估内容")
+    eval_score = Column(Float, nullable=False, server_default="0", comment="评估分数")
+    eval_result = Column(Text, nullable=True, comment="评估结果")
+    status = Column(String(50), server_default="PENDING", nullable=False, comment="状态：PENDING/EVALUATED")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_by = Column(String(255), nullable=True, comment="创建者")
+    updated_by = Column(String(255), nullable=True, comment="更新者")
--- a/runtime/datamate-python/app/module/init.py
+++ b/runtime/datamate-python/app/module/init.py
@@ -2,8 +2,9 @@ from fastapi import APIRouter

 from .system.interface import router as system_router
 from .annotation.interface import router as annotation_router
-from .synthesis.interface import router as ratio_router
+from .ratio.interface import router as ratio_router
 from .generation.interface import router as generation_router
+from .evaluation.interface import router as evaluation_router

 router = APIRouter(
    prefix="/api"
@@ -13,5 +14,6 @@ router.include_router(system_router)
 router.include_router(annotation_router)
 router.include_router(ratio_router)
 router.include_router(generation_router)
+router.include_router(evaluation_router)

 __all__ = ["router"]
--- a/runtime/datamate-python/app/module/dataset/schema/dataset_file.py
+++ b/runtime/datamate-python/app/module/dataset/schema/dataset_file.py
@@ -46,8 +46,7 @@ class DatasetFileTag(BaseModel):
            tags.append(tag_values)
        # 如果 from_name 不为空，添加前缀
        if self.from_name:
-            tags = [f"{self.from_name} {tag}" for tag in tags]
-
+            tags = [f"{self.from_name}@{tag}" for tag in tags]
        return tags


--- a/runtime/datamate-python/app/module/evaluation/init.py
+++ b/runtime/datamate-python/app/module/evaluation/init.py
--- a/runtime/datamate-python/app/module/evaluation/interface/init.py
+++ b/runtime/datamate-python/app/module/evaluation/interface/init.py
@@ -0,0 +1,11 @@
+from fastapi import APIRouter
+
+router = APIRouter(
+    prefix="/evaluation",
+    tags = ["evaluation"]
+)
+
+# Include sub-routers
+from .evaluation import router as evaluation_router
+
+router.include_router(evaluation_router)
--- a/runtime/datamate-python/app/module/evaluation/interface/evaluation.py
+++ b/runtime/datamate-python/app/module/evaluation/interface/evaluation.py
@@ -0,0 +1,429 @@
+import asyncio
+import uuid
+import json
+from typing import Optional
+from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select, func, or_, text, and_
+from pydantic import ValidationError
+
+from app.core.logging import get_logger
+from app.db.models.data_evaluation import EvaluationFile
+from app.db.session import get_db
+from app.db.models import EvaluationTask, EvaluationItem, DatasetFiles
+from app.module.evaluation.schema.evaluation import (
+    CreateEvaluationTaskRequest,
+    PagedEvaluationTaskResponse,
+    EvaluationTaskDetailResponse,
+    PagedEvaluationItemsResponse,
+    EvaluationItemResponse, PagedEvaluationFilesResponse, EvaluationFileResponse
+)
+from app.module.evaluation.schema.prompt import get_prompt
+from app.module.evaluation.schema.prompt_template import PromptTemplateResponse
+from app.module.evaluation.service.prompt_template_service import PromptTemplateService
+from app.module.evaluation.service.evaluation import EvaluationTaskService
+from app.module.shared.schema.common import StandardResponse, TaskStatus
+
+router = APIRouter(
+    prefix="",
+    tags=["evaluation"],
+)
+
+logger = get_logger(__name__)
+
+
+@router.get("/prompt-templates", response_model=StandardResponse[PromptTemplateResponse])
+async def get_prompt_templates():
+    """
+    Get all available evaluation prompt templates
+
+    Returns:
+        StandardResponse with list of prompt templates
+    """
+    try:
+        templates = PromptTemplateService.get_prompt_templates()
+        return StandardResponse(
+            code=200,
+            message="Success",
+            data=templates
+        )
+    except Exception as e:
+        logger.error(f"Failed to get prompt templates: {str(e)}")
+        raise HTTPException(
+            status_code=500,
+            detail="Failed to retrieve prompt templates"
+        )
+
+
+@router.post("/tasks", response_model=StandardResponse[EvaluationTaskDetailResponse], status_code=201)
+async def create_evaluation_task(
+    request: CreateEvaluationTaskRequest,
+    db: AsyncSession = Depends(get_db)
+):
+    """
+    创建评估任务
+
+    Args:
+        request: 创建评估任务请求
+        db: 数据库会话
+
+    Returns:
+        StandardResponse[EvaluationTaskDetailResponse]: 创建的任务详情
+    """
+    try:
+        # 检查任务名称是否已存在
+        existing_task = await db.execute(
+            select(EvaluationTask).where(EvaluationTask.name == request.name)
+        )
+        if existing_task.scalar_one_or_none():
+            raise HTTPException(status_code=400, detail=f"Evaluation task with name '{request.name}' already exists")
+
+        # 创建评估任务
+        task = EvaluationTask(
+            id=str(uuid.uuid4()),
+            name=request.name,
+            description=request.description,
+            task_type=request.task_type,
+            source_type=request.source_type,
+            source_id=request.source_id,
+            source_name=request.source_name,
+            eval_prompt=request.eval_prompt,
+            eval_config=json.dumps({
+                "model_id": request.eval_config.model_id,
+                "dimensions": request.eval_config.dimensions,
+            }),
+            status=TaskStatus.PENDING.value,
+            eval_process=0.0,
+        )
+
+        db.add(task)
+        # Commit first to persist the task before scheduling background work
+        await db.commit()
+        # Schedule background execution without blocking the current request
+        asyncio.create_task(EvaluationTaskService.run_evaluation_task(task.id))
+
+        # Refresh the task to return latest state
+        await db.refresh(task)
+
+        # 转换响应模型
+        response = _map_to_task_detail_response(task)
+        return StandardResponse(
+            code=200,
+            message="Evaluation task created successfully",
+            data=response
+        )
+
+    except ValidationError as e:
+        await db.rollback()
+        logger.error(f"Validation error: {str(e)}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        await db.rollback()
+        logger.error(f"Failed to create evaluation task: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@router.get("/tasks", response_model=StandardResponse[PagedEvaluationTaskResponse])
+async def list_evaluation_tasks(
+    page: int = Query(1, ge=1, description="页码，从1开始"),
+    size: int = Query(10, ge=1, le=100, description="每页数量"),
+    name: Optional[str] = Query(None, description="任务名称模糊查询"),
+    status: Optional[str] = Query(None, description="任务状态过滤"),
+    task_type: Optional[str] = Query(None, description="任务类型过滤"),
+    db: AsyncSession = Depends(get_db),
+):
+    """
+    分页查询评估任务
+
+    Args:
+        page: 页码，从1开始
+        size: 每页数量
+        name: 任务名称模糊查询
+        status: 任务状态过滤
+        task_type: 任务类型过滤
+        db: 数据库会话
+
+    Returns:
+        StandardResponse[PagedEvaluationTaskResponse]: 分页的评估任务列表
+    """
+    try:
+        # 构建查询条件
+        query = select(EvaluationTask)
+
+        if name:
+            query = query.where(EvaluationTask.name.ilike(f"%{name}%"))
+        if status:
+            query = query.where(EvaluationTask.status == status)
+        if task_type:
+            query = query.where(EvaluationTask.task_type == task_type)
+
+        # 获取总数
+        count_query = select(func.count()).select_from(query.subquery())
+        total = (await db.execute(count_query)).scalar_one()
+
+        # 分页查询
+        offset = (page - 1) * size
+        tasks = (await db.execute(
+            query.order_by(EvaluationTask.created_at.desc())
+                 .offset(offset)
+                 .limit(size)
+        )).scalars().all()
+
+        # 转换为响应模型
+        items = [_map_to_task_detail_response(task) for task in tasks]
+        total_pages = (total + size - 1) // size if size > 0 else 0
+
+        return StandardResponse(
+            code=200,
+            message="Success",
+            data=PagedEvaluationTaskResponse(
+                content=items,
+                totalElements=total,
+                totalPages=total_pages,
+                page=page,
+                size=size,
+            )
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to list evaluation tasks: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+@router.get("/tasks/{task_id}/files", response_model=StandardResponse[PagedEvaluationFilesResponse])
+async def list_evaluation_items(
+    task_id: str,
+    page: int = Query(1, ge=1, description="页码，从1开始"),
+    size: int = Query(10, ge=1, le=100, description="每页数量"),
+    db: AsyncSession = Depends(get_db),
+):
+    """
+    分页查询评估文件
+
+    Args:
+        task_id: 评估任务ID
+        page: 页码，从1开始
+        size: 每页数量
+        db: 数据库会话
+
+    Returns:
+        StandardResponse[PagedEvaluationFilesResponse]: 分页的评估文件列表
+    """
+    try:
+        task = await db.get(EvaluationTask, task_id)
+        if not task:
+            raise HTTPException(status_code=404, detail="Evaluation task not found")
+        offset = (page - 1) * size
+        query = select(EvaluationFile).where(EvaluationFile.task_id == task_id)
+        count_query = select(func.count()).select_from(query.subquery())
+        total = (await db.execute(count_query)).scalar_one()
+        files = (await db.execute(query.offset(offset).limit(size))).scalars().all()
+        total_pages = (total + size - 1) // size if size > 0 else 0
+        file_responses = [
+            EvaluationFileResponse(
+                taskId=file.task_id,
+                fileId=file.file_id,
+                fileName=file.file_name,
+                totalCount=file.total_count,
+                evaluatedCount=file.evaluated_count,
+                pendingCount=file.total_count - file.evaluated_count
+            )
+            for file in files
+        ]
+        return StandardResponse(
+            code=200,
+            message="Success",
+            data=PagedEvaluationFilesResponse(
+                content=file_responses,
+                totalElements=total,
+                totalPages=total_pages,
+                page=page,
+                size=size,
+            )
+        )
+    except Exception as e:
+        logger.error(f"Failed to list evaluation items: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@router.get("/tasks/{task_id}/items", response_model=StandardResponse[PagedEvaluationItemsResponse])
+async def list_evaluation_items(
+    task_id: str,
+    page: int = Query(1, ge=1, description="页码，从1开始"),
+    size: int = Query(10, ge=1, le=100, description="每页数量"),
+    status: Optional[str] = Query(None, description="状态过滤"),
+    file_id: Optional[str] = Query(None, description="文件过滤"),
+    db: AsyncSession = Depends(get_db),
+):
+    """
+    分页查询评估条目
+
+    Args:
+        task_id: 评估任务ID
+        page: 页码，从1开始
+        size: 每页数量
+        status: 状态过滤
+        file_id: 文件过滤
+        db: 数据库会话
+
+    Returns:
+        StandardResponse[PagedEvaluationItemsResponse]: 分页的评估条目列表
+    """
+    try:
+        # 检查任务是否存在
+        task = await db.get(EvaluationTask, task_id)
+        if not task:
+            raise HTTPException(status_code=404, detail="Evaluation task not found")
+
+        # 构建查询条件
+        query = select(EvaluationItem).where(EvaluationItem.task_id == task_id)
+
+        if status:
+            query = query.where(EvaluationItem.status == status)
+
+        if file_id:
+            query = query.where(EvaluationItem.file_id == file_id)
+
+        # 获取总数
+        count_query = select(func.count()).select_from(query.subquery())
+        total = (await db.execute(count_query)).scalar_one()
+
+        # 分页查询
+        offset = (page - 1) * size
+        items = (await db.execute(query.offset(offset).limit(size))).scalars().all()
+
+        # 转换为响应模型
+        item_responses = [
+            EvaluationItemResponse(
+                id=item.id,
+                taskId=item.task_id,
+                itemId=item.item_id,
+                fileId=item.file_id,
+                evalContent=json.loads(item.eval_content),
+                evalScore=float(item.eval_score) if item.eval_score else None,
+                evalResult=json.loads(item.eval_result),
+                status=item.status
+            )
+            for item in items
+        ]
+
+        total_pages = (total + size - 1) // size if size > 0 else 0
+
+        return StandardResponse(
+            code=200,
+            message="Success",
+            data=PagedEvaluationItemsResponse(
+                content=item_responses,
+                totalElements=total,
+                totalPages=total_pages,
+                page=page,
+                size=size,
+            )
+        )
+    except Exception as e:
+        logger.error(f"Failed to list evaluation items: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@router.get("/tasks/{task_id}", response_model=StandardResponse[EvaluationTaskDetailResponse])
+async def get_evaluation_task(
+    task_id: str,
+    db: AsyncSession = Depends(get_db),
+):
+    """
+    获取评估任务详情
+
+    Args:
+        task_id: 任务ID
+        db: 数据库会话
+
+    Returns:
+        StandardResponse[EvaluationTaskDetailResponse]: 评估任务详情
+    """
+    try:
+        task = await db.get(EvaluationTask, task_id)
+        if not task:
+            raise HTTPException(status_code=404, detail="Evaluation task not found")
+
+        # 转换为响应模型
+        response = _map_to_task_detail_response(task)
+        return StandardResponse(
+            code=200,
+            message="Success",
+            data=response
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to get evaluation task: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@router.delete("/tasks", response_model=StandardResponse[str], status_code=200)
+async def delete_eval_tasks(
+    ids: list[str] = Query(..., description="要删除的评估任务ID列表"),
+    db: AsyncSession = Depends(get_db),
+):
+    """
+    删除评估任务
+
+    Args:
+        ids: 任务ID
+        db: 数据库会话
+
+    Returns:
+        StandardResponse[str]: 删除结果
+    """
+    try:
+        # 检查任务是否存在
+        task_id = ids[0]
+        task = await db.get(EvaluationTask, task_id)
+        if not task:
+            raise HTTPException(status_code=404, detail="Evaluation task not found")
+
+        # 删除评估项
+        await db.execute(
+            EvaluationItem.__table__.delete()
+            .where(EvaluationItem.task_id == task_id)
+        )
+
+        # 删除任务
+        await db.delete(task)
+        await db.commit()
+
+        return StandardResponse(
+            code=200,
+            message="Evaluation task deleted successfully",
+            data="success"
+        )
+
+    except HTTPException:
+        await db.rollback()
+        raise
+    except Exception as e:
+        await db.rollback()
+        logger.error(f"Failed to delete evaluation task: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+def _map_to_task_detail_response(
+    task: EvaluationTask
+) -> EvaluationTaskDetailResponse:
+    """将数据库模型转换为任务详情响应模型"""
+    task_response = EvaluationTaskDetailResponse(
+        id=task.id,
+        name=task.name,
+        description=task.description,
+        taskType=task.task_type,
+        sourceType=task.source_type,
+        sourceId=task.source_id,
+        sourceName=task.source_name,
+        status=task.status,
+        evalProcess=task.eval_process,
+        evalPrompt=task.eval_prompt,
+        evalConfig=json.loads(task.eval_config),
+        createdAt=task.created_at.isoformat() if task.created_at else None,
+        updatedAt=task.updated_at.isoformat() if task.updated_at else None,
+    )
+    task_response.eval_prompt = get_prompt(task_response.task_type, task_response.eval_config.get("dimensions"))
+    return task_response
--- a/runtime/datamate-python/app/module/evaluation/schema/init.py
+++ b/runtime/datamate-python/app/module/evaluation/schema/init.py
--- a/runtime/datamate-python/app/module/evaluation/schema/evaluation.py
+++ b/runtime/datamate-python/app/module/evaluation/schema/evaluation.py
@@ -0,0 +1,101 @@
+from typing import List, Optional, Dict, Any
+from pydantic import BaseModel, Field, field_validator
+from enum import Enum
+
+from app.core.logging import get_logger
+from app.module.shared.schema.common import TaskStatus
+
+logger = get_logger(__name__)
+
+class EvaluationConfig(BaseModel):
+    """评估配置项"""
+    model_id: str = Field(..., alias="modelId", description="模型id")
+    dimensions: list[dict] = Field(..., alias="dimensions", description="评估维度")
+
+
+class CreateEvaluationTaskRequest(BaseModel):
+    """创建评估任务请求"""
+    name: str = Field(..., description="评估任务名称")
+    description: Optional[str] = Field(None, description="评估任务描述")
+    task_type: str = Field(..., alias="taskType", description="评估任务类型：QA/QUALITY/COMPATIBILITY/VALUE")
+    source_type: str = Field(..., alias="sourceType", description="待评估对象类型：DATASET/SYNTHESIS")
+    source_id: str = Field(..., alias="sourceId", description="待评估对象ID")
+    source_name: str = Field(..., alias="sourceName", description="待评估对象名称")
+    eval_method: str = Field("AUTO", alias="evalMethod", description="评估提示词")
+    eval_prompt: Optional[str] = Field(None, alias="evalPrompt", description="评估提示词")
+    eval_config: EvaluationConfig = Field(..., alias="evalConfig", description="评估配置项列表")
+
+
+class EvaluationTaskItem(BaseModel):
+    """评估任务列表项"""
+    id: str = Field(..., description="任务ID")
+    name: str = Field(..., description="任务名称")
+    description: Optional[str] = Field(None, description="任务描述")
+    task_type: Optional[str] = Field(..., alias="taskType", description="任务类型")
+    source_type: Optional[str] = Field(..., alias="sourceType", description="数据源类型")
+    source_id: Optional[str] = Field(..., alias="sourceId", description="数据源ID")
+    source_name: Optional[str] = Field(None, alias="sourceName", description="数据源名称")
+    status: TaskStatus = Field(..., description="任务状态")
+    eval_process: Optional[float] = Field(0, alias="evalProcess", description="评估进度")
+    created_at: Optional[str] = Field(None, alias="createdAt", description="创建时间")
+    updated_at: Optional[str] = Field(None, alias="updatedAt", description="更新时间")
+
+
+class PagedEvaluationTaskResponse(BaseModel):
+    """分页评估任务响应"""
+    content: List[EvaluationTaskItem]
+    total_elements: int = Field(..., alias="totalElements")
+    total_pages: int = Field(..., alias="totalPages")
+    page: int
+    size: int
+
+
+class EvaluationTaskDetailResponse(EvaluationTaskItem):
+    """评估任务详情响应"""
+    eval_prompt: Optional[str] = Field(None, alias="evalPrompt", description="评估提示词")
+    eval_config: Optional[Dict[str, Any]] = Field(None, alias="evalConfig", description="评估配置")
+    eval_result: Optional[Dict[str, Any]] = Field(None, alias="evalResult", description="评估结果")
+
+
+class EvaluationItemResponse(BaseModel):
+    """评估条目响应"""
+    id: str = Field(..., description="条目ID")
+    task_id: str = Field(..., alias="taskId", description="任务ID")
+    file_id: str = Field(..., alias="fileId", description="文件ID")
+    item_id: str = Field(..., alias="itemId", description="评估项ID")
+    eval_content: Optional[Dict[str, Any]] = Field(None, alias="evalContent", description="评估内容")
+    eval_score: Optional[float] = Field(None, alias="evalScore", description="评估分数")
+    eval_result: Optional[Dict[str, Any]] = Field(None, alias="evalResult", description="评估结果详情")
+    status: str = Field(..., description="评估状态")
+
+class EvaluationFileResponse(BaseModel):
+    """评估文件响应"""
+    task_id: str = Field(..., alias="taskId", description="任务ID")
+    file_id: str = Field(..., alias="fileId", description="文件ID")
+    file_name: str = Field(..., alias="fileName", description="文件名")
+    total_count: int = Field(..., alias="totalCount", description="总数")
+    evaluated_count: int = Field(..., alias="evaluatedCount", description="已评估数")
+    pending_count: int = Field(..., alias="pendingCount", description="待评估数")
+
+
+
+class PagedEvaluationItemsResponse(BaseModel):
+    """分页评估任务响应"""
+    content: List[EvaluationItemResponse]
+    total_elements: int = Field(..., alias="totalElements")
+    total_pages: int = Field(..., alias="totalPages")
+    page: int
+    size: int
+
+class PagedEvaluationFilesResponse(BaseModel):
+    """分页评估任务响应"""
+    content: List[EvaluationFileResponse]
+    total_elements: int = Field(..., alias="totalElements")
+    total_pages: int = Field(..., alias="totalPages")
+    page: int
+    size: int
+
+
+class SourceType(Enum):
+    DATASET = "DATASET"
+    SYNTHESIS = "SYNTHESIS"
--- a/runtime/datamate-python/app/module/evaluation/schema/prompt.py
+++ b/runtime/datamate-python/app/module/evaluation/schema/prompt.py
@@ -0,0 +1,87 @@
+EVALUATION_PROMPT_TEMPLATE = [
+    {
+        "evalType": "QA",
+        "defaultDimensions": [
+            {
+                "dimension": "问题是否独立",
+                "description": "仅分析问题，问题的主体和客体都比较明确，即使有省略，也符合语言习惯。在不需要补充其他信息的情况下不会引起疑惑。"
+            },
+            {
+                "dimension": "语法是否错误",
+                "description": "问题为疑问句，答案为陈述句; 不存在词语搭配不当的情况;连接词和标点符号不存在错用情况；逻辑混乱的情况不存在；语法结构都正确且完整。"
+            },
+            {
+                "dimension": "回答是否有针对性",
+                "description": "回答应对问题中的所有疑问点提供正面、直接的回答，不应引起疑惑。同时，答案不应有任何内容的遗漏，需构成一个完整的陈述。"
+            }
+        ],
+        "prompt": """
+# Role: 问答对质量评估专家
+## Profile:
+- Description: 你是一名专业的对话文本质量评估专家，擅长从多个维度对问答对进行质量评估，为机器学习模型训练提供高质量的数据筛选建议。具备深度学习、自然语言处理和数据科学的专业背景。
+
+## Skills:
+1. 能够从多个维度对问答对进行综合评估
+2. 擅长识别问答对中的潜在问题，如答案不准确、问题模糊、文本不匹配、逻辑错误等
+3. 能够给出具体的改进建议和质量评分，并提供可操作的优化方案
+4. 熟悉机器学习训练数据的质量标准和最佳实践
+5. 能够区分不同类型的问题（事实性、推理性、创造性）并采用相应的评估标准
+
+## 评估维度:
+{dimensions}
+
+## 原始文本块内容:
+{content}
+
+## 问题:
+{question}
+
+## 答案:
+{answer}
+
+## 评估说明:
+1. **数据集类型识别**：如果原始文本块内容为空或显示"Distilled Content"，说明这是一个蒸馏数据集，没有原始文本参考。请重点评估问题的质量、答案的合理性和逻辑性，以及问答的一致性。
+2. **评估原则**：采用严格的评估标准，确保筛选出的数据集能够有效提升模型性能。
+
+## 注意事项:
+- 评估结论要具体指出优点和不足，提供可操作的改进建议
+- 评估结论控制在150字以内，简洁明了但要涵盖关键信息
+
+## 输出要求:
+请按照以下JSON格式输出评估结果，评估结果为Y/N，符合标注输出Y，不符合标准输出N：
+
+{
+  "result": {{result_example}
+  },
+  "evaluation": "这是一个高质量的问答数据集。问题表述清晰具体，答案准确完整且逻辑性强，与原始文本高度相关。建议：可以进一步丰富答案的细节描述。"
+}
+"""
+    }
+]
+
+def get_dimensions_for_qa(dimensions: list[dict]) -> str:
+    dimensions_str = "\n"
+    index = 1
+    for dimension in dimensions:
+        dimensions_str += f"### {index}. {dimension.get("dimension")}\n**评估标准：**\n{dimension.get("description")}\n\n"
+        index += 1
+    return dimensions_str
+
+def get_result_example_for_qa(dimensions: list[dict]) -> str:
+    result_example = ""
+    for dimension in dimensions:
+        result_example += f'\n    "{dimension.get("dimension")}": "Y",'
+    return result_example
+
+def get_prompt(task_type: str, dimensions: list[dict]) -> str:
+    template = None
+    for t in EVALUATION_PROMPT_TEMPLATE:
+        if t.get("evalType") == task_type:
+            template = t.get("prompt")
+            break
+    if not template:
+        template = EVALUATION_PROMPT_TEMPLATE[0].get("prompt", "")
+    if not dimensions or len(dimensions) == 0:
+        return template
+    return (template.replace("{dimensions}", get_dimensions_for_qa(dimensions))
+                       .replace("{result_example}", get_result_example_for_qa(dimensions)))
--- a/runtime/datamate-python/app/module/evaluation/schema/prompt_template.py
+++ b/runtime/datamate-python/app/module/evaluation/schema/prompt_template.py
@@ -0,0 +1,29 @@
+"""
+Schema for evaluation prompt templates.
+"""
+from typing import List, Dict, Any
+from pydantic import BaseModel, Field
+
+
+class PromptTemplateDimension(BaseModel):
+    """A single dimension in the prompt template"""
+    dimension: str = Field(..., description="Dimension name")
+    description: str = Field(..., description="Description of the dimension")
+
+
+class PromptTemplateItem(BaseModel):
+    """A single prompt template item"""
+    evalType: str = Field(..., description="Evaluation type")
+    defaultDimensions: List[PromptTemplateDimension] = Field(
+        default_factory=list,
+        description="List of default dimensions for this evaluation type"
+    )
+    prompt: str = Field(..., description="The prompt template string")
+
+
+class PromptTemplateResponse(BaseModel):
+    """Response model for getting prompt templates"""
+    templates: List[PromptTemplateItem] = Field(
+        ...,
+        description="List of available prompt templates"
+    )
--- a/runtime/datamate-python/app/module/evaluation/service/init.py
+++ b/runtime/datamate-python/app/module/evaluation/service/init.py
--- a/runtime/datamate-python/app/module/evaluation/service/evaluation.py
+++ b/runtime/datamate-python/app/module/evaluation/service/evaluation.py
@@ -0,0 +1,207 @@
+import json
+import uuid
+import asyncio
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.exception import BusinessErrorCodeEnum, BusinessException
+from app.core.logging import get_logger
+from app.db.models import EvaluationItem, EvaluationTask, DatasetFiles
+from app.db.models.data_evaluation import EvaluationFile
+from app.db.models.data_synthesis import DataSynthesisFileInstance, SynthesisData
+from app.db.session import AsyncSessionLocal
+from app.module.evaluation.schema.evaluation import SourceType
+from app.module.shared.schema import TaskStatus
+from app.module.shared.util.model_chat import call_openai_style_model
+from app.module.evaluation.schema.prompt import get_prompt
+from app.module.shared.util.structured_file import StructuredFileHandlerFactory
+from app.module.system.service.common_service import get_model_by_id
+
+logger = get_logger(__name__)
+
+class EvaluationExecutor:
+    def __init__(self, db: AsyncSession, task: EvaluationTask):
+        self.db = db
+        self.task = task
+
+    async def save_eval_items(self):
+        pass
+
+    def get_eval_prompt(self, item: EvaluationItem) -> str:
+        prompt_text = get_prompt(self.task.task_type, json.loads(self.task.eval_config).get("dimensions"))
+        eval_content = json.loads(item.eval_content)
+        if self.task.task_type == "QA":
+            prompt_text = ((prompt_text.replace("{content}", eval_content.get("input"))
+                            .replace("{question}", eval_content.get("instruction")))
+                           .replace("{answer}", eval_content.get("output")))
+        return prompt_text
+
+    async def execute(self):
+        eval_config = json.loads(self.task.eval_config)
+        model_config = await get_model_by_id(self.db, eval_config.get("model_id"))
+        semaphore = asyncio.Semaphore(10)
+        files = (await self.db.execute(
+            select(EvaluationFile).where(EvaluationFile.task_id == self.task.id)
+        )).scalars().all()
+        for file in files:
+            items = (await self.db.execute(
+                select(EvaluationItem).where(EvaluationItem.task_id == self.task.id)
+                .where(EvaluationItem.file_id == file.file_id)
+            )).scalars().all()
+            tasks = [
+                self.evaluate_item(model_config, item, semaphore)
+                for item in items
+            ]
+            await asyncio.gather(*tasks, return_exceptions=True)
+            file.evaluated_count = len(items)
+            await self.db.commit()
+
+    async def evaluate_item(self, model_config, item: EvaluationItem, semaphore: asyncio.Semaphore):
+        async with semaphore:
+            prompt_text = self.get_eval_prompt(item)
+            resp_text = await asyncio.to_thread(
+                call_openai_style_model, model_config.base_url, model_config.api_key, model_config.model_name,
+                prompt_text,
+            )
+            item.eval_result = resp_text
+            item.status = TaskStatus.COMPLETED.value
+            await self.db.commit()
+
+
+    def get_source_type(self) -> SourceType:
+        pass
+
+
+class DatasetEvaluationExecutor(EvaluationExecutor):
+    def __init__(self, db: AsyncSession, task: EvaluationTask):
+        super().__init__(db, task)
+
+    async def save_eval_items(self):
+        dataset_files = ((await self.db.execute(select(DatasetFiles)
+                                               .where(DatasetFiles.dataset_id == self.task.source_id)))
+                         .scalars().all())
+        handler = StructuredFileHandlerFactory().get_handler(self.task.task_type)
+        for dataset_file in dataset_files:
+            if dataset_file.file_type.upper() != "JSON" and dataset_file.file_type.upper() != "JSONL":
+                continue
+            items = handler.get_items_from_file(dataset_file.file_path)
+            logger.info(f"parse {len(items)} items from file {dataset_file.file_name}")
+            for item in items:
+                self.db.add(EvaluationItem(
+                    id=str(uuid.uuid4()),
+                    task_id=self.task.id,
+                    file_id=dataset_file.id,
+                    item_id=item.get("id") if item.get("id") else str(uuid.uuid4()),
+                    eval_content=json.dumps(item, ensure_ascii=False),
+                    status=TaskStatus.PENDING.value,
+                    created_by=self.task.created_by,
+                    updated_by=self.task.updated_by,
+                ))
+            self.db.add(EvaluationFile(
+                id=str(uuid.uuid4()),
+                task_id=self.task.id,
+                file_id=dataset_file.id,
+                file_name=dataset_file.file_name,
+                total_count=len(items),
+                evaluated_count=0,
+                created_by=self.task.created_by,
+                updated_by=self.task.updated_by,
+            ))
+
+    def get_source_type(self) -> SourceType:
+        return SourceType.DATASET
+
+
+class SynthesisEvaluationExecutor(EvaluationExecutor):
+    def __init__(self, db: AsyncSession, task: EvaluationTask):
+        super().__init__(db, task)
+
+    async def save_eval_items(self):
+        synthesis_files = ((await self.db.execute(select(DataSynthesisFileInstance)
+                               .where(DataSynthesisFileInstance.task_id == self.task.source_id)))
+                           .scalars().all())
+        for synthesis_file in synthesis_files:
+            synthesis_datas = ((await self.db.execute(select(SynthesisData)
+                                                     .where(SynthesisData.synthesis_file_instance_id == synthesis_file.id)))
+                               .scalars().all())
+            logger.info(f"get {len(synthesis_datas)} items from file {synthesis_file.file_name}")
+            for synthesis_data in synthesis_datas:
+                self.db.add(EvaluationItem(
+                    id=str(uuid.uuid4()),
+                    task_id=self.task.id,
+                    file_id=synthesis_file.id,
+                    item_id=synthesis_data.id,
+                    eval_content=synthesis_data.data,
+                    status=TaskStatus.PENDING.value,
+                    created_by=self.task.created_by,
+                    updated_by=self.task.updated_by,
+                ))
+            self.db.add(EvaluationFile(
+                id=str(uuid.uuid4()),
+                task_id=self.task.id,
+                file_id=synthesis_file.id,
+                file_name=synthesis_file.file_name,
+                total_count=len(synthesis_datas),
+                evaluated_count=0,
+                created_by=self.task.created_by,
+                updated_by=self.task.updated_by,
+            ))
+        pass
+
+    def get_source_type(self) -> SourceType:
+        return SourceType.SYNTHESIS
+
+
+class EvaluationExecutorFactory:
+    def __init__(self, db: AsyncSession, task: EvaluationTask):
+        self.db = db
+        self.executors: list[EvaluationExecutor] = []
+        self.executors.append(DatasetEvaluationExecutor(db, task))
+        self.executors.append(SynthesisEvaluationExecutor(db, task))
+
+    def get_executor(self, source_type: str) -> EvaluationExecutor:
+        for executor in self.executors:
+            if executor.get_source_type().value == source_type:
+                return executor
+        raise BusinessException(BusinessErrorCodeEnum.TASK_TYPE_ERROR.value)
+
+
+class EvaluationTaskService:
+
+    @staticmethod
+    async def run_evaluation_task(task_id: str):
+        """
+        Background worker to run evaluations.
+        - task_id: id of EvaluationTaskModel
+        """
+        logger.info(f"Background evaluation worker started add items for task {task_id}")
+        async with AsyncSessionLocal() as session:
+            try:
+                task = await session.execute(select(EvaluationTask).where(EvaluationTask.id == task_id))
+                task = task.scalar_one_or_none()
+                factory = EvaluationExecutorFactory(session, task)
+                executor = factory.get_executor(task.source_type)
+                await executor.save_eval_items()
+                task.status = TaskStatus.RUNNING.value
+            except Exception as e:
+                logger.error(f"Background worker encountered error for task {task_id}: {e}")
+                task.status = TaskStatus.FAILED.value
+            finally:
+                await session.commit()
+
+        logger.info(f"Background evaluation worker started for task {task_id}")
+        async with AsyncSessionLocal() as session:
+            try:
+                task = await session.execute(select(EvaluationTask).where(EvaluationTask.id == task_id))
+                task = task.scalar_one_or_none()
+                factory = EvaluationExecutorFactory(session, task)
+                executor = factory.get_executor(task.source_type)
+                await executor.execute()
+                logger.info(f"Background evaluation worker finished for task {task_id}")
+                task.status = TaskStatus.COMPLETED.value
+            except Exception as e:
+                logger.error(f"Background worker encountered error for task {task_id}: {e}")
+                task.status = TaskStatus.FAILED.value
+            finally:
+                await session.commit()
--- a/runtime/datamate-python/app/module/evaluation/service/prompt_template_service.py
+++ b/runtime/datamate-python/app/module/evaluation/service/prompt_template_service.py
@@ -0,0 +1,45 @@
+"""
+Service for managing evaluation prompt templates.
+"""
+from typing import List, Dict, Any
+
+from app.module.evaluation.schema.prompt import EVALUATION_PROMPT_TEMPLATE
+from app.module.evaluation.schema.prompt_template import (
+    PromptTemplateItem,
+    PromptTemplateDimension,
+    PromptTemplateResponse
+)
+
+
+class PromptTemplateService:
+    """Service for managing evaluation prompt templates"""
+
+    @staticmethod
+    def get_prompt_templates() -> PromptTemplateResponse:
+        """
+        Get all available prompt templates
+        
+        Returns:
+            PromptTemplateResponse containing all prompt templates
+        """
+        templates = []
+        
+        for template in EVALUATION_PROMPT_TEMPLATE:
+            # Convert dimensions to the proper schema
+            dimensions = [
+                PromptTemplateDimension(
+                    dimension=dim.get("dimension"),
+                    description=dim.get("description", "")
+                )
+                for dim in template.get("defaultDimensions", [])
+            ]
+            
+            # Create template item
+            template_item = PromptTemplateItem(
+                evalType=template.get("evalType", ""),
+                defaultDimensions=dimensions,
+                prompt=template.get("prompt", "")
+            )
+            templates.append(template_item)
+        
+        return PromptTemplateResponse(templates=templates)
--- a/runtime/datamate-python/app/module/ratio/init.py
+++ b/runtime/datamate-python/app/module/ratio/init.py
--- a/runtime/datamate-python/app/module/synthesis/interface/init.py
+++ b/runtime/datamate-python/app/module/synthesis/interface/init.py
--- a/runtime/datamate-python/app/module/synthesis/interface/ratio_task.py
+++ b/runtime/datamate-python/app/module/synthesis/interface/ratio_task.py
@@ -13,7 +13,7 @@ from app.db.models import Dataset
 from app.db.session import get_db
 from app.module.dataset import DatasetManagementService
 from app.module.shared.schema import StandardResponse, TaskStatus
-from app.module.synthesis.schema.ratio_task import (
+from app.module.ratio.schema.ratio_task import (
    CreateRatioTaskResponse,
    CreateRatioTaskRequest,
    PagedRatioTaskResponse,
@@ -21,7 +21,7 @@ from app.module.synthesis.schema.ratio_task import (
    TargetDatasetInfo,
    RatioTaskDetailResponse,
 )
-from app.module.synthesis.service.ratio_task import RatioTaskService
+from app.module.ratio.service.ratio_task import RatioTaskService
 from app.db.models.ratio_task import RatioInstance, RatioRelation, RatioRelation as RatioRelationModel

 router = APIRouter(
--- a/runtime/datamate-python/app/module/ratio/schema/init.py
+++ b/runtime/datamate-python/app/module/ratio/schema/init.py
--- a/runtime/datamate-python/app/module/synthesis/schema/ratio_task.py
+++ b/runtime/datamate-python/app/module/synthesis/schema/ratio_task.py
@@ -7,9 +7,13 @@ from app.module.shared.schema.common import TaskStatus

 logger = get_logger(__name__)

+class LabelFilter(BaseModel):
+    label: Optional[str] = Field(..., description="标签")
+    value: Optional[str] = Field(None, description="标签值")
+
 class FilterCondition(BaseModel):
    date_range: Optional[str] = Field(None, description="数据范围", alias="dateRange")
-    label: Optional[str] = Field(None, description="标签")
+    label: Optional[LabelFilter] = Field(None, description="标签")

    @field_validator("date_range")
    @classmethod
--- a/runtime/datamate-python/app/module/ratio/service/init.py
+++ b/runtime/datamate-python/app/module/ratio/service/init.py
--- a/runtime/datamate-python/app/module/synthesis/service/ratio_task.py
+++ b/runtime/datamate-python/app/module/synthesis/service/ratio_task.py
@@ -7,7 +7,6 @@ import shutil
 import asyncio

 from sqlalchemy import select
-
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.core.logging import get_logger
@@ -16,7 +15,7 @@ from app.db.models import Dataset, DatasetFiles
 from app.db.session import AsyncSessionLocal
 from app.module.dataset.schema.dataset_file import DatasetFileTag
 from app.module.shared.schema import TaskStatus
-from app.module.synthesis.schema.ratio_task import FilterCondition
+from app.module.ratio.schema.ratio_task import FilterCondition

 logger = get_logger(__name__)

@@ -59,7 +58,10 @@ class RatioTaskService:
                counts=int(item.get("counts", 0)),
                filter_conditions=json.dumps({
                    'date_range': item.get("filter_conditions").date_range,
-                    'label': item.get("filter_conditions").label,
+                    'label': {
+                        "label":item.get("filter_conditions").label.label,
+                        "value":item.get("filter_conditions").label.value,
+                    },
                })
            )
            logger.info(f"Relation created: {relation.id}, {relation}, {item}, {config}")
@@ -285,7 +287,7 @@ class RatioTaskService:
            try:
                # tags could be a list of strings or list of objects with 'name'
                tag_names = RatioTaskService.get_all_tags(tags)
-                return conditions.label in tag_names
+                return f"{conditions.label.label}@{conditions.label.value}" in tag_names
            except Exception as e:
                logger.exception(f"Failed to get tags for {file}", e)
                return False
--- a/runtime/datamate-python/app/module/shared/util/init.py
+++ b/runtime/datamate-python/app/module/shared/util/init.py
--- a/runtime/datamate-python/app/module/shared/util/model_chat.py
+++ b/runtime/datamate-python/app/module/shared/util/model_chat.py
@@ -0,0 +1,15 @@
+from openai import OpenAI
+
+
+def call_openai_style_model(base_url, api_key, model_name, prompt, **kwargs):
+    client = OpenAI(
+        base_url=base_url,
+        api_key=api_key
+    )
+
+    response = client.chat.completions.create(
+        model=model_name,
+        messages=[{"role": "user", "content": prompt}],
+        **kwargs
+    )
+    return response.choices[0].message.content
--- a/runtime/datamate-python/app/module/shared/util/structured_file.py
+++ b/runtime/datamate-python/app/module/shared/util/structured_file.py
@@ -0,0 +1,85 @@
+import json
+
+from enum import Enum
+from jsonschema import validate
+
+class ItemTypes(Enum):
+    QA = "QA"
+
+
+class StructuredFileItemHandler:
+    def __init__(self):
+        pass
+
+    def get_item_type(self) -> ItemTypes:
+        pass
+
+    def get_items_from_file(self, file_path: str) -> list[dict]:
+        pass
+
+    def check_file(self) -> bool:
+        pass
+
+class QAItemHandler(StructuredFileItemHandler):
+    def __init__(self):
+        self.schema_alpaca = {
+            "type": "object",
+            "properties": {
+                "instruction": {"type": "string"},
+                "input": {"type": "string"},
+                "output": {"type": "string"}
+            },
+            "required": ["instruction", "output"],
+        }
+        self.schema_alpaca_list = {
+            "type": "array",
+            "items": self.schema_alpaca,
+        }
+        super().__init__()
+
+    def get_item_type(self):
+        return ItemTypes.QA
+
+    def validate_json(self, data):
+        try:
+            validate(instance=data, schema=self.schema_alpaca)
+            return True
+        except Exception as e:
+            try:
+                validate(instance=data, schema=self.schema_alpaca_list)
+                return True
+            except Exception as e:
+                return False
+
+    def get_items_from_file(self, file_path: str) -> list[dict]:
+        file_type = file_path.split(".")[-1].upper()
+        items = []
+        if file_type == "JSON":
+            with open(file_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+                if not self.validate_json(data):
+                    return items
+                items = data
+        elif file_type == "JSONL":
+            with open(file_path, "r", encoding="utf-8") as f:
+                for line in f:
+                    data = json.loads(line)
+                    if not self.validate_json(data):
+                        continue
+                    items.append(data)
+        return items
+
+    def check_file(self) -> bool:
+        pass
+
+
+class StructuredFileHandlerFactory:
+    def __init__(self):
+        self.handlers: list[StructuredFileItemHandler] = []
+        self.handlers.append(QAItemHandler())
+
+    def get_handler(self, item_type: str) -> StructuredFileItemHandler:
+        for handler in self.handlers:
+            if handler.get_item_type().value == item_type:
+                return handler
+        raise ValueError(f"Unsupported item type: {item_type}")
--- a/runtime/datamate-python/poetry.lock
+++ b/runtime/datamate-python/poetry.lock
@@ -1260,6 +1260,43 @@ files = [
    {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"},
 ]

+[[package]]
+name = "jsonschema"
+version = "4.25.1"
+description = "An implementation of JSON Schema validation for Python"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"},
+    {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"},
+]
+
+[package.dependencies]
+attrs = ">=22.2.0"
+jsonschema-specifications = ">=2023.03.6"
+referencing = ">=0.28.4"
+rpds-py = ">=0.7.1"
+
+[package.extras]
+format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
+format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "rfc3987-syntax (>=1.1.0)", "uri-template", "webcolors (>=24.6.0)"]
+
+[[package]]
+name = "jsonschema-specifications"
+version = "2025.9.1"
+description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe"},
+    {file = "jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d"},
+]
+
+[package.dependencies]
+referencing = ">=0.31.0"
+
 [[package]]
 name = "langchain"
 version = "1.1.0"
@@ -2864,6 +2901,23 @@ files = [
 [package.extras]
 all = ["numpy"]

+[[package]]
+name = "referencing"
+version = "0.37.0"
+description = "JSON Referencing + Python"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+    {file = "referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231"},
+    {file = "referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8"},
+]
+
+[package.dependencies]
+attrs = ">=22.2.0"
+rpds-py = ">=0.7.0"
+typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""}
+
 [[package]]
 name = "regex"
 version = "2025.11.3"
@@ -3026,6 +3080,131 @@ files = [
 [package.dependencies]
 requests = ">=2.0.1,<3.0.0"

+[[package]]
+name = "rpds-py"
+version = "0.30.0"
+description = "Python bindings to Rust's persistent data structures (rpds)"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+    {file = "rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288"},
+    {file = "rpds_py-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4cc2206b76b4f576934f0ed374b10d7ca5f457858b157ca52064bdfc26b9fc00"},
+    {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:389a2d49eded1896c3d48b0136ead37c48e221b391c052fba3f4055c367f60a6"},
+    {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:32c8528634e1bf7121f3de08fa85b138f4e0dc47657866630611b03967f041d7"},
+    {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f207f69853edd6f6700b86efb84999651baf3789e78a466431df1331608e5324"},
+    {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:67b02ec25ba7a9e8fa74c63b6ca44cf5707f2fbfadae3ee8e7494297d56aa9df"},
+    {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0e95f6819a19965ff420f65578bacb0b00f251fefe2c8b23347c37174271f3"},
+    {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:a452763cc5198f2f98898eb98f7569649fe5da666c2dc6b5ddb10fde5a574221"},
+    {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e0b65193a413ccc930671c55153a03ee57cecb49e6227204b04fae512eb657a7"},
+    {file = "rpds_py-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:858738e9c32147f78b3ac24dc0edb6610000e56dc0f700fd5f651d0a0f0eb9ff"},
+    {file = "rpds_py-0.30.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:da279aa314f00acbb803da1e76fa18666778e8a8f83484fba94526da5de2cba7"},
+    {file = "rpds_py-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7c64d38fb49b6cdeda16ab49e35fe0da2e1e9b34bc38bd78386530f218b37139"},
+    {file = "rpds_py-0.30.0-cp310-cp310-win32.whl", hash = "sha256:6de2a32a1665b93233cde140ff8b3467bdb9e2af2b91079f0333a0974d12d464"},
+    {file = "rpds_py-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:1726859cd0de969f88dc8673bdd954185b9104e05806be64bcd87badbe313169"},
+    {file = "rpds_py-0.30.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a2bffea6a4ca9f01b3f8e548302470306689684e61602aa3d141e34da06cf425"},
+    {file = "rpds_py-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dc4f992dfe1e2bc3ebc7444f6c7051b4bc13cd8e33e43511e8ffd13bf407010d"},
+    {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:422c3cb9856d80b09d30d2eb255d0754b23e090034e1deb4083f8004bd0761e4"},
+    {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07ae8a593e1c3c6b82ca3292efbe73c30b61332fd612e05abee07c79359f292f"},
+    {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12f90dd7557b6bd57f40abe7747e81e0c0b119bef015ea7726e69fe550e394a4"},
+    {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99b47d6ad9a6da00bec6aabe5a6279ecd3c06a329d4aa4771034a21e335c3a97"},
+    {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33f559f3104504506a44bb666b93a33f5d33133765b0c216a5bf2f1e1503af89"},
+    {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:946fe926af6e44f3697abbc305ea168c2c31d3e3ef1058cf68f379bf0335a78d"},
+    {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495aeca4b93d465efde585977365187149e75383ad2684f81519f504f5c13038"},
+    {file = "rpds_py-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9a0ca5da0386dee0655b4ccdf46119df60e0f10da268d04fe7cc87886872ba7"},
+    {file = "rpds_py-0.30.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8d6d1cc13664ec13c1b84241204ff3b12f9bb82464b8ad6e7a5d3486975c2eed"},
+    {file = "rpds_py-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3896fa1be39912cf0757753826bc8bdc8ca331a28a7c4ae46b7a21280b06bb85"},
+    {file = "rpds_py-0.30.0-cp311-cp311-win32.whl", hash = "sha256:55f66022632205940f1827effeff17c4fa7ae1953d2b74a8581baaefb7d16f8c"},
+    {file = "rpds_py-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:a51033ff701fca756439d641c0ad09a41d9242fa69121c7d8769604a0a629825"},
+    {file = "rpds_py-0.30.0-cp311-cp311-win_arm64.whl", hash = "sha256:47b0ef6231c58f506ef0b74d44e330405caa8428e770fec25329ed2cb971a229"},
+    {file = "rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad"},
+    {file = "rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05"},
+    {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28"},
+    {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd"},
+    {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f"},
+    {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1"},
+    {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23"},
+    {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6"},
+    {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51"},
+    {file = "rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5"},
+    {file = "rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e"},
+    {file = "rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394"},
+    {file = "rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf"},
+    {file = "rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b"},
+    {file = "rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e"},
+    {file = "rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2"},
+    {file = "rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8"},
+    {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4"},
+    {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136"},
+    {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7"},
+    {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2"},
+    {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6"},
+    {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e"},
+    {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d"},
+    {file = "rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7"},
+    {file = "rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31"},
+    {file = "rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95"},
+    {file = "rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d"},
+    {file = "rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15"},
+    {file = "rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6"},
+    {file = "rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d"},
+    {file = "rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0"},
+    {file = "rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be"},
+    {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f"},
+    {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f"},
+    {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87"},
+    {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18"},
+    {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad"},
+    {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07"},
+    {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f"},
+    {file = "rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65"},
+    {file = "rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f"},
+    {file = "rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53"},
+    {file = "rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed"},
+    {file = "rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950"},
+    {file = "rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0"},
+    {file = "rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c2262bdba0ad4fc6fb5545660673925c2d2a5d9e2e0fb603aad545427be0fc58"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3adbb8179ce342d235c31ab8ec511e66c73faa27a47e076ccc92421add53e2bb"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:250fa00e9543ac9b97ac258bd37367ff5256666122c2d0f2bc97577c60a1818c"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9854cf4f488b3d57b9aaeb105f06d78e5529d3145b1e4a41750167e8c213c6d3"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:993914b8e560023bc0a8bf742c5f303551992dcb85e247b1e5c7f4a7d145bda5"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58edca431fb9b29950807e301826586e5bbf24163677732429770a697ffe6738"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:dea5b552272a944763b34394d04577cf0f9bd013207bc32323b5a89a53cf9c2f"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ba3af48635eb83d03f6c9735dfb21785303e73d22ad03d489e88adae6eab8877"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:dff13836529b921e22f15cb099751209a60009731a68519630a24d61f0b1b30a"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1b151685b23929ab7beec71080a8889d4d6d9fa9a983d213f07121205d48e2c4"},
+    {file = "rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e"},
+    {file = "rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84"},
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
@@ -4261,4 +4440,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12,<4.0.0"
-content-hash = "9c64ec78daa702d8344683faba327cb686b7fa79cf9bb12006c3dcf55dae789d"
+content-hash = "26fa6096f5efdd91a8d5b17a2f5efd68cd5f26e4dcdd516a39b77708c36b45e8"
--- a/runtime/datamate-python/pyproject.toml
+++ b/runtime/datamate-python/pyproject.toml
@@ -27,6 +27,8 @@ dependencies = [
    "langchain (>=1.1.0,<2.0.0)",
    "langchain-community (>=0.4.1,<0.5.0)",
    "langchain-openai (>=1.1.0,<2.0.0)",
+    "openai (>=2.8.1,<3.0.0)",
+    "jsonschema (>=4.25.1,<5.0.0)",
 ]