You've already forked DataMate
feat: enhance backend deployment, frontend file selection and synthesis task management (#129)
* feat: Implement data synthesis task management with database models and API endpoints * feat: Update Python version requirements and refine dependency constraints in configuration * fix: Correctly extract file values from selectedFilesMap in AddDataDialog * feat: Refactor synthesis task routes and enhance file task management in the API * feat: Enhance SynthesisTaskTab with tooltip actions and add chunk data retrieval in API
This commit is contained in:
@@ -5,12 +5,14 @@ on:
|
||||
branches: [ "main" ]
|
||||
paths:
|
||||
- 'scripts/images/datamate-python/**'
|
||||
- 'runtime/datamate-python/**'
|
||||
- '.github/workflows/docker-image-backend-python.yml'
|
||||
- '.github/workflows/docker-images-reusable.yml'
|
||||
pull_request:
|
||||
branches: [ "main" ]
|
||||
paths:
|
||||
- 'scripts/images/datamate-python/**'
|
||||
- 'runtime/datamate-python/**'
|
||||
- '.github/workflows/docker-image-backend-python.yml'
|
||||
- '.github/workflows/docker-images-reusable.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
@@ -10,6 +10,7 @@ services:
|
||||
volumes:
|
||||
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
|
||||
command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
|
||||
restart: always
|
||||
networks:
|
||||
- datamate
|
||||
healthcheck:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useEffect } from "react";
|
||||
import React, { useCallback, useEffect } from "react";
|
||||
import { Button, Input, Table } from "antd";
|
||||
import { RightOutlined } from "@ant-design/icons";
|
||||
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
||||
@@ -19,6 +19,7 @@ interface DatasetFileTransferProps
|
||||
open: boolean;
|
||||
selectedFilesMap: { [key: string]: DatasetFile };
|
||||
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
|
||||
onDatasetSelect?: (dataset: Dataset | null) => void;
|
||||
}
|
||||
|
||||
const fileCols = [
|
||||
@@ -48,6 +49,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
open,
|
||||
selectedFilesMap,
|
||||
onSelectedFilesChange,
|
||||
onDatasetSelect,
|
||||
...props
|
||||
}) => {
|
||||
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
|
||||
@@ -96,7 +98,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
300
|
||||
);
|
||||
|
||||
const fetchFiles = async () => {
|
||||
const fetchFiles = useCallback(async () => {
|
||||
if (!selectedDataset) return;
|
||||
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
||||
page: filesPagination.current - 1,
|
||||
@@ -104,23 +106,25 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
keyword: filesSearch,
|
||||
});
|
||||
setFiles(
|
||||
data.content.map((item) => ({
|
||||
(data.content || []).map((item: DatasetFile) => ({
|
||||
...item,
|
||||
key: item.id,
|
||||
datasetName: selectedDataset.name,
|
||||
})) || []
|
||||
}))
|
||||
);
|
||||
setFilesPagination((prev) => ({
|
||||
...prev,
|
||||
total: data.totalElements,
|
||||
}));
|
||||
};
|
||||
}, [filesPagination.current, filesPagination.pageSize, filesSearch, selectedDataset]);
|
||||
|
||||
useEffect(() => {
|
||||
if (selectedDataset) {
|
||||
fetchFiles();
|
||||
}
|
||||
}, [selectedDataset]);
|
||||
fetchFiles().catch(() => {});
|
||||
}, [fetchFiles]);
|
||||
|
||||
useEffect(() => {
|
||||
onDatasetSelect?.(selectedDataset);
|
||||
}, [selectedDataset, onDatasetSelect]);
|
||||
|
||||
const toggleSelectFile = (record: DatasetFile) => {
|
||||
if (!selectedFilesMap[record.id]) {
|
||||
@@ -147,8 +151,9 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
setShowFiles(false);
|
||||
setSelectedDataset(null);
|
||||
setDatasetSelections([]);
|
||||
onDatasetSelect?.(null);
|
||||
}
|
||||
}, [open]);
|
||||
}, [open, onDatasetSelect]);
|
||||
|
||||
const datasetCols = [
|
||||
{
|
||||
@@ -206,7 +211,15 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
})}
|
||||
dataSource={datasets}
|
||||
columns={datasetCols}
|
||||
pagination={datasetPagination}
|
||||
pagination={{
|
||||
...datasetPagination,
|
||||
onChange: (page, pageSize) =>
|
||||
setDatasetPagination({
|
||||
current: page,
|
||||
pageSize: pageSize || datasetPagination.pageSize,
|
||||
total: datasetPagination.total,
|
||||
}),
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<RightOutlined />
|
||||
@@ -231,21 +244,11 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
})}
|
||||
rowSelection={{
|
||||
type: "checkbox",
|
||||
onSelectAll: (selected, _, changeRows) => {
|
||||
const newSelectedFiles = { ...selectedFilesMap };
|
||||
if (selected) {
|
||||
changeRows.forEach((row) => {
|
||||
newSelectedFiles[row.id] = row;
|
||||
});
|
||||
} else {
|
||||
changeRows.forEach((row) => {
|
||||
delete newSelectedFiles[row.id];
|
||||
});
|
||||
}
|
||||
onSelectedFilesChange(newSelectedFiles);
|
||||
},
|
||||
selectedRowKeys: Object.keys(selectedFilesMap),
|
||||
onSelect: toggleSelectFile,
|
||||
getCheckboxProps: (record: DatasetFile) => ({
|
||||
name: record.fileName,
|
||||
}),
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
186
frontend/src/pages/SynthesisTask/SynthFileTask.tsx
Normal file
186
frontend/src/pages/SynthesisTask/SynthFileTask.tsx
Normal file
@@ -0,0 +1,186 @@
|
||||
import { useEffect, useState } from "react";
|
||||
import { useParams, useNavigate } from "react-router";
|
||||
import { Table, Badge, Button } from "antd";
|
||||
import type { ColumnsType, TablePaginationConfig } from "antd/es/table";
|
||||
import { querySynthesisFileTasksUsingGet, querySynthesisTaskByIdUsingGet } from "@/pages/SynthesisTask/synthesis-api";
|
||||
import type { BadgeProps } from "antd";
|
||||
import { formatDateTime } from "@/utils/unit";
|
||||
|
||||
interface SynthesisFileTaskItem {
|
||||
id: string;
|
||||
synthesis_instance_id: string;
|
||||
file_name: string;
|
||||
source_file_id: string;
|
||||
target_file_location: string;
|
||||
status?: string;
|
||||
total_chunks: number;
|
||||
processed_chunks: number;
|
||||
created_at?: string;
|
||||
updated_at?: string;
|
||||
}
|
||||
|
||||
interface PagedResponse<T> {
|
||||
content: T[];
|
||||
totalElements: number;
|
||||
totalPages: number;
|
||||
page: number;
|
||||
size: number;
|
||||
}
|
||||
|
||||
interface SynthesisTaskInfo {
|
||||
id: string;
|
||||
name: string;
|
||||
synthesis_type: string;
|
||||
status: string;
|
||||
created_at: string;
|
||||
model_id: string;
|
||||
}
|
||||
|
||||
export default function SynthFileTask() {
|
||||
const { id: taskId = "" } = useParams();
|
||||
const navigate = useNavigate();
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [data, setData] = useState<SynthesisFileTaskItem[]>([]);
|
||||
const [pagination, setPagination] = useState<TablePaginationConfig>({
|
||||
current: 1,
|
||||
pageSize: 10,
|
||||
total: 0,
|
||||
});
|
||||
const [taskInfo, setTaskInfo] = useState<SynthesisTaskInfo | null>(null);
|
||||
|
||||
// 查询总任务详情
|
||||
useEffect(() => {
|
||||
if (!taskId) return;
|
||||
querySynthesisTaskByIdUsingGet(taskId).then((res) => {
|
||||
setTaskInfo(res?.data?.data || null);
|
||||
});
|
||||
}, [taskId]);
|
||||
|
||||
const fetchData = async (page = 1, pageSize = 10) => {
|
||||
if (!taskId) return;
|
||||
setLoading(true);
|
||||
try {
|
||||
const res = await querySynthesisFileTasksUsingGet(taskId, {
|
||||
page,
|
||||
page_size: pageSize,
|
||||
});
|
||||
const payload: PagedResponse<SynthesisFileTaskItem> =
|
||||
res?.data?.data ?? res?.data ?? {
|
||||
content: [],
|
||||
totalElements: 0,
|
||||
totalPages: 0,
|
||||
page,
|
||||
size: pageSize,
|
||||
};
|
||||
setData(payload.content || []);
|
||||
setPagination({
|
||||
current: payload.page ?? page,
|
||||
pageSize: payload.size ?? pageSize,
|
||||
total: payload.totalElements ?? payload.content?.length ?? 0,
|
||||
});
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
fetchData(1, pagination.pageSize || 10);
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [taskId]);
|
||||
|
||||
const handleTableChange = (pag: TablePaginationConfig) => {
|
||||
fetchData(pag.current || 1, pag.pageSize || 10);
|
||||
};
|
||||
|
||||
const columns: ColumnsType<SynthesisFileTaskItem> = [
|
||||
{
|
||||
title: "文件名",
|
||||
dataIndex: "file_name",
|
||||
key: "file_name",
|
||||
},
|
||||
{
|
||||
title: "状态",
|
||||
dataIndex: "status",
|
||||
key: "status",
|
||||
render: (status?: string) => {
|
||||
let badgeStatus: BadgeProps["status"] = "default";
|
||||
let text = status || "未知";
|
||||
if (status === "pending" || status === "processing") {
|
||||
badgeStatus = "processing";
|
||||
text = "处理中";
|
||||
} else if (status === "completed") {
|
||||
badgeStatus = "success";
|
||||
text = "已完成";
|
||||
} else if (status === "failed") {
|
||||
badgeStatus = "error";
|
||||
text = "失败";
|
||||
}
|
||||
return <Badge status={badgeStatus} text={text} />;
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "切片进度",
|
||||
key: "chunks",
|
||||
render: (_text, record) => (
|
||||
<span>
|
||||
{record.processed_chunks}/{record.total_chunks}
|
||||
</span>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "目标文件路径",
|
||||
dataIndex: "target_file_location",
|
||||
key: "target_file_location",
|
||||
ellipsis: true,
|
||||
},
|
||||
{
|
||||
title: "创建时间",
|
||||
dataIndex: "created_at",
|
||||
key: "created_at",
|
||||
render: (val?: string) => (val ? formatDateTime(val) : "-"),
|
||||
},
|
||||
{
|
||||
title: "更新时间",
|
||||
dataIndex: "updated_at",
|
||||
key: "updated_at",
|
||||
render: (val?: string) => (val ? formatDateTime(val) : "-"),
|
||||
},
|
||||
];
|
||||
|
||||
return (
|
||||
<div className="p-4 bg-white rounded-lg h-full flex flex-col">
|
||||
{/* 顶部任务信息和返回按钮 */}
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<div className="space-y-1">
|
||||
{taskInfo && (
|
||||
<>
|
||||
<div className="text-lg font-medium flex items-center gap-2">
|
||||
<span>{taskInfo.name}</span>
|
||||
<span className="text-xs px-2 py-0.5 rounded bg-blue-50 text-blue-700 border border-blue-200">
|
||||
{taskInfo.synthesis_type === "QA" ? "问答对生成" : taskInfo.synthesis_type === "COT" ? "链式推理生成" : taskInfo.synthesis_type}
|
||||
</span>
|
||||
<span className="text-xs px-2 py-0.5 rounded bg-gray-50 text-gray-700 border border-gray-200">
|
||||
状态:{taskInfo.status === "pending" ? "等待中" : taskInfo.status === "completed" ? "已完成" : taskInfo.status === "failed" ? "失败" : taskInfo.status}
|
||||
</span>
|
||||
</div>
|
||||
<div className="text-xs text-gray-500 flex gap-4">
|
||||
<span>创建时间:{formatDateTime(taskInfo.created_at)}</span>
|
||||
<span>模型ID:{taskInfo.model_id}</span>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
<Button type="default" onClick={() => navigate("/data/synthesis/task")}>返回任务首页</Button>
|
||||
</div>
|
||||
{/* 文件任务表格 */}
|
||||
<Table<SynthesisFileTaskItem>
|
||||
rowKey="id"
|
||||
loading={loading}
|
||||
dataSource={data}
|
||||
columns={columns}
|
||||
pagination={pagination}
|
||||
onChange={handleTableChange}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,112 +1,116 @@
|
||||
import { useState } from "react";
|
||||
import { Card, Button, Badge, Table, Progress } from "antd";
|
||||
import { useState, useEffect, ElementType } from "react";
|
||||
import { Card, Button, Badge, Table, Modal, message, Tooltip } from "antd";
|
||||
import {
|
||||
Plus,
|
||||
Sparkles,
|
||||
ArrowUp,
|
||||
ArrowDown,
|
||||
Pause,
|
||||
Play,
|
||||
DownloadIcon,
|
||||
CheckCircle,
|
||||
Check,
|
||||
StopCircle,
|
||||
Sparkles,
|
||||
} from "lucide-react";
|
||||
import type { SynthesisTask } from "@/pages/SynthesisTask/synthesis";
|
||||
import { mockSynthesisTasks } from "@/mock/synthesis";
|
||||
import { DeleteOutlined, EyeOutlined } from "@ant-design/icons";
|
||||
import { Link, useNavigate } from "react-router";
|
||||
import { SearchControls } from "@/components/SearchControls";
|
||||
import { formatDateTime } from "@/utils/unit";
|
||||
import {
|
||||
querySynthesisTasksUsingGet,
|
||||
deleteSynthesisTaskByIdUsingDelete,
|
||||
} from "@/pages/SynthesisTask/synthesis-api";
|
||||
|
||||
interface SynthesisTask {
|
||||
id: string;
|
||||
name: string;
|
||||
description?: string;
|
||||
status: string;
|
||||
synthesis_type: string;
|
||||
model_id: string;
|
||||
progress?: number;
|
||||
result_data_location?: string;
|
||||
text_split_config?: {
|
||||
chunk_size: number;
|
||||
chunk_overlap: number;
|
||||
};
|
||||
synthesis_config?: {
|
||||
temperature?: number | null;
|
||||
prompt_template?: string;
|
||||
synthesis_count?: number | null;
|
||||
};
|
||||
source_file_id?: string[];
|
||||
total_files?: number;
|
||||
processed_files?: number;
|
||||
total_chunks?: number;
|
||||
processed_chunks?: number;
|
||||
total_synthesis_data?: number;
|
||||
created_at: string;
|
||||
updated_at?: string;
|
||||
created_by?: string;
|
||||
updated_by?: string;
|
||||
}
|
||||
|
||||
export default function SynthesisTaskTab() {
|
||||
const navigate = useNavigate();
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [tasks, setTasks] = useState<SynthesisTask[]>(mockSynthesisTasks);
|
||||
const [tasks, setTasks] = useState<SynthesisTask[]>([]);
|
||||
const [filterStatus, setFilterStatus] = useState("all");
|
||||
const [sortBy, setSortBy] = useState<"createdAt" | "name">("createdAt");
|
||||
const [sortOrder, setSortOrder] = useState<"asc" | "desc">("desc");
|
||||
const [page, setPage] = useState(1);
|
||||
const [pageSize, setPageSize] = useState(10);
|
||||
const [total, setTotal] = useState(0);
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
// 过滤任务
|
||||
const filteredTasks = tasks.filter((task) => {
|
||||
const matchesSearch =
|
||||
task.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
|
||||
task.template.toLowerCase().includes(searchQuery.toLowerCase());
|
||||
const matchesStatus =
|
||||
filterStatus === "all" || task.status === filterStatus;
|
||||
return matchesSearch && matchesStatus;
|
||||
});
|
||||
|
||||
// 排序任务
|
||||
const sortedTasks = [...filteredTasks].sort((a, b) => {
|
||||
if (sortBy === "createdAt") {
|
||||
const dateA = new Date(a.createdAt).getTime();
|
||||
const dateB = new Date(b.createdAt).getTime();
|
||||
return sortOrder === "asc" ? dateA - dateB : dateB - dateA;
|
||||
} else if (sortBy === "name") {
|
||||
return sortOrder === "asc"
|
||||
? a.name.localeCompare(b.name)
|
||||
: b.name.localeCompare(a.name);
|
||||
// 获取任务列表
|
||||
const loadTasks = async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
const params = {
|
||||
page: page,
|
||||
page_size: pageSize,
|
||||
} as {
|
||||
page?: number;
|
||||
page_size?: number;
|
||||
synthesis_type?: string;
|
||||
status?: string;
|
||||
name?: string;
|
||||
};
|
||||
if (searchQuery) params.name = searchQuery;
|
||||
if (filterStatus !== "all") params.synthesis_type = filterStatus;
|
||||
const res = await querySynthesisTasksUsingGet(params);
|
||||
setTasks(res?.data?.content || []);
|
||||
setTotal(res?.data?.totalElements || 0);
|
||||
} catch {
|
||||
setTasks([]);
|
||||
setTotal(0);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
return 0;
|
||||
});
|
||||
const handleTaskAction = (taskId: number, action: string) => {
|
||||
setTasks((prev) =>
|
||||
prev.map((task) => {
|
||||
if (task.id === taskId) {
|
||||
switch (action) {
|
||||
case "pause":
|
||||
return { ...task, status: "paused" as const };
|
||||
case "resume":
|
||||
return { ...task, status: "running" as const };
|
||||
case "stop":
|
||||
return {
|
||||
...task,
|
||||
status: "failed" as const,
|
||||
progress: task.progress,
|
||||
};
|
||||
default:
|
||||
return task;
|
||||
}
|
||||
}
|
||||
return task;
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
loadTasks();
|
||||
// eslint-disable-next-line
|
||||
}, [searchQuery, filterStatus, page, pageSize]);
|
||||
|
||||
// 状态徽章
|
||||
const getStatusBadge = (status: string) => {
|
||||
const statusConfig = {
|
||||
pending: {
|
||||
label: "等待中",
|
||||
color: "#F59E0B",
|
||||
icon: Pause,
|
||||
},
|
||||
running: {
|
||||
label: "运行中",
|
||||
color: "#3B82F6",
|
||||
icon: Play,
|
||||
},
|
||||
completed: {
|
||||
label: "已完成",
|
||||
color: "#10B981",
|
||||
icon: CheckCircle,
|
||||
},
|
||||
failed: {
|
||||
label: "失败",
|
||||
color: "#EF4444",
|
||||
icon: Pause,
|
||||
},
|
||||
paused: {
|
||||
label: "已暂停",
|
||||
color: "#E5E7EB",
|
||||
icon: Pause,
|
||||
},
|
||||
const statusConfig: Record<string, { label: string; color: string; icon: ElementType }> = {
|
||||
pending: { label: "等待中", color: "#F59E0B", icon: Pause },
|
||||
running: { label: "运行中", color: "#3B82F6", icon: Play },
|
||||
completed: { label: "已完成", color: "#10B981", icon: CheckCircle },
|
||||
failed: { label: "失败", color: "#EF4444", icon: Pause },
|
||||
paused: { label: "已暂停", color: "#E5E7EB", icon: Pause },
|
||||
};
|
||||
return (
|
||||
statusConfig[status as keyof typeof statusConfig] || statusConfig.pending
|
||||
);
|
||||
return statusConfig[status] ?? statusConfig["pending"];
|
||||
};
|
||||
|
||||
// 任务表格列
|
||||
// 类型映射
|
||||
const typeMap: Record<string, string> = {
|
||||
QA: "问答对生成",
|
||||
COT: "链式推理生成",
|
||||
};
|
||||
|
||||
// 表格列
|
||||
const taskColumns = [
|
||||
{
|
||||
title: (
|
||||
@@ -134,98 +138,77 @@ export default function SynthesisTaskTab() {
|
||||
dataIndex: "name",
|
||||
key: "name",
|
||||
fixed: "left" as const,
|
||||
render: (text: string, task: SynthesisTask) => (
|
||||
render: (_: unknown, task: SynthesisTask) => (
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-8 h-8 bg-blue-500 rounded-lg flex items-center justify-center shadow-sm">
|
||||
{/* 可根据 type 渲染不同图标 */}
|
||||
<span className="text-white font-bold text-base">
|
||||
{task.type?.toUpperCase()?.slice(0, 1) || "T"}
|
||||
{task.synthesis_type?.toUpperCase()?.slice(0, 1) || "T"}
|
||||
</span>
|
||||
</div>
|
||||
<div>
|
||||
<Link to={`/data/synthesis/task/${task.id}`}>{task.name}</Link>
|
||||
<div className="text-xs text-gray-500">{task.template}</div>
|
||||
</div>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "类型",
|
||||
dataIndex: "type",
|
||||
key: "type",
|
||||
render: (type: string) => type.toUpperCase(),
|
||||
dataIndex: "synthesis_type",
|
||||
key: "synthesis_type",
|
||||
render: (type: string) => typeMap[type] || type,
|
||||
},
|
||||
{
|
||||
title: "状态",
|
||||
dataIndex: "status",
|
||||
key: "status",
|
||||
render: (status: string) => {
|
||||
const statusConfig = getStatusBadge(status);
|
||||
return <Badge color={statusConfig.color} text={statusConfig.label} />;
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "进度",
|
||||
dataIndex: "progress",
|
||||
key: "progress",
|
||||
width: 150,
|
||||
render: (_: any, task: SynthesisTask) => (
|
||||
<Progress percent={task.progress} size="small" />
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "源数据集",
|
||||
dataIndex: "sourceDataset",
|
||||
key: "sourceDataset",
|
||||
render: (text: string) => (
|
||||
<div className="text-sm text-gray-900">{text}</div>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "生成数量",
|
||||
dataIndex: "generatedCount",
|
||||
key: "generatedCount",
|
||||
render: (_: any, task: SynthesisTask) => (
|
||||
<div className="text-sm font-medium text-gray-900">
|
||||
{task.generatedCount?.toLocaleString?.()} /{" "}
|
||||
{task.targetCount?.toLocaleString?.()}
|
||||
</div>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "质量评分",
|
||||
dataIndex: "quality",
|
||||
key: "quality",
|
||||
render: (quality: number) => (quality ? `${quality}%` : "-"),
|
||||
title: "文件数",
|
||||
dataIndex: "total_files",
|
||||
key: "total_files",
|
||||
render: (num: number, task: SynthesisTask) => <span>{num ?? (task.source_file_id?.length ?? 0)}</span>,
|
||||
},
|
||||
{
|
||||
title: "创建时间",
|
||||
dataIndex: "createdAt",
|
||||
key: "createdAt",
|
||||
render: formatDateTime,
|
||||
dataIndex: "created_at",
|
||||
key: "created_at",
|
||||
render: (val: string) => formatDateTime(val),
|
||||
},
|
||||
{
|
||||
title: "操作",
|
||||
key: "actions",
|
||||
fixed: "right" as const,
|
||||
render: (_: any, task: SynthesisTask) => (
|
||||
render: (_: unknown, task: SynthesisTask) => (
|
||||
<div className="flex items-center justify-center gap-1">
|
||||
{task.status === "running" && (
|
||||
<Tooltip title="查看详情">
|
||||
<Button
|
||||
onClick={() => handleTaskAction(task.id, "pause")}
|
||||
className="hover:bg-orange-50 p-1 h-7 w-7"
|
||||
onClick={() => navigate(`/data/synthesis/task/${task.id}`)}
|
||||
className="hover:bg-blue-50 p-1 h-7 w-7"
|
||||
type="text"
|
||||
icon={<Pause className="w-4 h-4" />}
|
||||
></Button>
|
||||
)}
|
||||
{task.status === "paused" && (
|
||||
icon={<EyeOutlined />}
|
||||
/>
|
||||
</Tooltip>
|
||||
<Tooltip title="删除任务">
|
||||
<Button
|
||||
onClick={() => handleTaskAction(task.id, "resume")}
|
||||
className="hover:bg-green-50 p-1 h-7 w-7"
|
||||
danger
|
||||
type="text"
|
||||
icon={<Play className="w-4 h-4" />}
|
||||
></Button>
|
||||
)}
|
||||
className="hover:bg-red-50 p-1 h-7 w-7"
|
||||
icon={<DeleteOutlined />}
|
||||
onClick={() => {
|
||||
Modal.confirm({
|
||||
title: `确认删除任务?`,
|
||||
content: `任务名:${task.name}`,
|
||||
okText: "删除",
|
||||
okType: "danger",
|
||||
cancelText: "取消",
|
||||
onOk: async () => {
|
||||
try {
|
||||
await deleteSynthesisTaskByIdUsingDelete(task.id);
|
||||
message.success("删除成功");
|
||||
loadTasks();
|
||||
} catch {
|
||||
message.error("删除失败");
|
||||
}
|
||||
},
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</Tooltip>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
@@ -237,18 +220,15 @@ export default function SynthesisTaskTab() {
|
||||
<SearchControls
|
||||
searchTerm={searchQuery}
|
||||
onSearchChange={setSearchQuery}
|
||||
searchPlaceholder="搜索任务名称或模板..."
|
||||
searchPlaceholder="搜索任务名称..."
|
||||
filters={[
|
||||
{
|
||||
key: "status",
|
||||
label: "状态",
|
||||
label: "类型",
|
||||
options: [
|
||||
{ label: "全部状态", value: "all" },
|
||||
{ label: "等待中", value: "pending" },
|
||||
{ label: "运行中", value: "running" },
|
||||
{ label: "已完成", value: "completed" },
|
||||
{ label: "失败", value: "failed" },
|
||||
{ label: "已暂停", value: "paused" },
|
||||
{ label: "全部类型", value: "all" },
|
||||
{ label: "问答对生成", value: "QA" },
|
||||
{ label: "链式推理生成", value: "COT" },
|
||||
],
|
||||
},
|
||||
]}
|
||||
@@ -259,13 +239,23 @@ export default function SynthesisTaskTab() {
|
||||
showFilters
|
||||
showViewToggle={false}
|
||||
/>
|
||||
|
||||
{/* 任务表格 */}
|
||||
<Card>
|
||||
<Table
|
||||
columns={taskColumns}
|
||||
dataSource={sortedTasks}
|
||||
dataSource={tasks}
|
||||
rowKey="id"
|
||||
loading={loading}
|
||||
pagination={{
|
||||
current: page,
|
||||
pageSize: pageSize,
|
||||
total: total,
|
||||
onChange: (p, ps) => {
|
||||
setPage(p);
|
||||
setPageSize(ps);
|
||||
},
|
||||
showSizeChanger: true,
|
||||
}}
|
||||
scroll={{ x: "max-content" }}
|
||||
locale={{
|
||||
emptyText: (
|
||||
|
||||
37
frontend/src/pages/SynthesisTask/synthesis-api.ts
Normal file
37
frontend/src/pages/SynthesisTask/synthesis-api.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import { get, post, del } from "@/utils/request";
|
||||
|
||||
// 创建数据合成任务
|
||||
export function createSynthesisTaskUsingPost(data: unknown) {
|
||||
return post("/api/synthesis/gen/task", data);
|
||||
}
|
||||
|
||||
// 获取数据合成任务详情
|
||||
export function querySynthesisTaskByIdUsingGet(taskId: string) {
|
||||
return get(`/api/synthesis/gen/task/${taskId}`);
|
||||
}
|
||||
|
||||
// 分页查询数据合成任务列表
|
||||
export function querySynthesisTasksUsingGet(params: {
|
||||
page?: number;
|
||||
page_size?: number;
|
||||
synthesis_type?: string;
|
||||
status?: string;
|
||||
name?: string;
|
||||
}) {
|
||||
return get(`/api/synthesis/gen/tasks`, params as any);
|
||||
}
|
||||
|
||||
// 删除整个数据合成任务
|
||||
export function deleteSynthesisTaskByIdUsingDelete(taskId: string) {
|
||||
return del(`/api/synthesis/gen/task/${taskId}`);
|
||||
}
|
||||
|
||||
// 分页查询某个任务下的文件任务列表
|
||||
export function querySynthesisFileTasksUsingGet(taskId: string, params: { page?: number; page_size?: number }) {
|
||||
return get(`/api/synthesis/gen/task/${taskId}/files`, params as any);
|
||||
}
|
||||
|
||||
// 获取不同合成类型对应的 Prompt
|
||||
export function getPromptByTypeUsingGet(synthType: string) {
|
||||
return get(`/api/synthesis/gen/prompt`, { synth_type: synthType } as any);
|
||||
}
|
||||
@@ -40,6 +40,7 @@ import { withErrorBoundary } from "@/components/ErrorBoundary";
|
||||
import AgentPage from "@/pages/Agent/Agent.tsx";
|
||||
import RatioTaskDetail from "@/pages/RatioTask/Detail/RatioTaskDetail";
|
||||
import CleansingTemplateDetail from "@/pages/DataCleansing/Detail/TemplateDetail";
|
||||
import SynthFileTask from "@/pages/SynthesisTask/SynthFileTask.tsx";
|
||||
import EvaluationDetailPage from "@/pages/DataEvaluation/Detail/TaskDetail.tsx";
|
||||
|
||||
const router = createBrowserRouter([
|
||||
@@ -160,6 +161,7 @@ const router = createBrowserRouter([
|
||||
path: "create",
|
||||
Component: SynthesisTaskCreate,
|
||||
},
|
||||
{path: ":id", Component: SynthFileTask},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/synth",
|
||||
tags = ["synth"]
|
||||
prefix="/synthesis",
|
||||
tags = ["synthesis"]
|
||||
)
|
||||
|
||||
# Include sub-routers
|
||||
|
||||
@@ -18,7 +18,14 @@ from app.db.session import get_db
|
||||
from app.module.generation.schema.generation import (
|
||||
CreateSynthesisTaskRequest,
|
||||
DataSynthesisTaskItem,
|
||||
PagedDataSynthesisTaskResponse, SynthesisType)
|
||||
PagedDataSynthesisTaskResponse,
|
||||
SynthesisType,
|
||||
DataSynthesisFileTaskItem,
|
||||
PagedDataSynthesisFileTaskResponse,
|
||||
DataSynthesisChunkItem,
|
||||
PagedDataSynthesisChunkResponse,
|
||||
SynthesisDataItem,
|
||||
)
|
||||
from app.module.generation.service.generation_service import GenerationService
|
||||
from app.module.generation.service.prompt import get_prompt
|
||||
from app.module.shared.schema import StandardResponse
|
||||
@@ -219,19 +226,26 @@ async def delete_synthesis_task(
|
||||
data=None,
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/task/{task_id}/{file_id}", response_model=StandardResponse[None])
|
||||
async def delete_synthesis_file_task(
|
||||
task_id: str,
|
||||
file_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""删除数据合成任务中的文件任务"""
|
||||
"""删除数据合成任务中的文件任务,同时刷新任务表中的文件/切片数量"""
|
||||
# 先获取任务和文件任务记录
|
||||
task = await db.get(DataSynthesisInstance, task_id)
|
||||
if not task:
|
||||
raise HTTPException(status_code=404, detail="Synthesis task not found")
|
||||
|
||||
file_task = await db.get(DataSynthesisFileInstance, file_id)
|
||||
if not file_task:
|
||||
raise HTTPException(status_code=404, detail="Synthesis file task not found")
|
||||
|
||||
# 删除 SynthesisData(根据文件任务ID)
|
||||
await db.execute(delete(SynthesisData).where(
|
||||
await db.execute(
|
||||
delete(SynthesisData).where(
|
||||
SynthesisData.synthesis_file_instance_id == file_id
|
||||
)
|
||||
)
|
||||
@@ -243,11 +257,28 @@ async def delete_synthesis_file_task(
|
||||
)
|
||||
|
||||
# 删除文件任务记录
|
||||
await db.execute(delete(DataSynthesisFileInstance).where(
|
||||
await db.execute(
|
||||
delete(DataSynthesisFileInstance).where(
|
||||
DataSynthesisFileInstance.id == file_id
|
||||
)
|
||||
)
|
||||
|
||||
# 刷新任务级别统计字段:总文件数、总文本块数、已处理文本块数
|
||||
if task.total_files and task.total_files > 0:
|
||||
task.total_files -= 1
|
||||
if task.total_files < 0:
|
||||
task.total_files = 0
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(task)
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=None,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/prompt", response_model=StandardResponse[str])
|
||||
async def get_prompt_by_type(
|
||||
synth_type: SynthesisType,
|
||||
@@ -258,3 +289,157 @@ async def get_prompt_by_type(
|
||||
message="Success",
|
||||
data=prompt,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/task/{task_id}/files", response_model=StandardResponse[PagedDataSynthesisFileTaskResponse])
|
||||
async def list_synthesis_file_tasks(
|
||||
task_id: str,
|
||||
page: int = 1,
|
||||
page_size: int = 10,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""分页获取某个数据合成任务下的文件任务列表"""
|
||||
# 先校验任务是否存在
|
||||
task = await db.get(DataSynthesisInstance, task_id)
|
||||
if not task:
|
||||
raise HTTPException(status_code=404, detail="Synthesis task not found")
|
||||
|
||||
base_query = select(DataSynthesisFileInstance).where(
|
||||
DataSynthesisFileInstance.synthesis_instance_id == task_id
|
||||
)
|
||||
|
||||
count_q = select(func.count()).select_from(base_query.subquery())
|
||||
total = (await db.execute(count_q)).scalar_one()
|
||||
|
||||
if page < 1:
|
||||
page = 1
|
||||
if page_size < 1:
|
||||
page_size = 10
|
||||
|
||||
result = await db.execute(
|
||||
base_query.offset((page - 1) * page_size).limit(page_size)
|
||||
)
|
||||
rows = result.scalars().all()
|
||||
|
||||
file_items = [
|
||||
DataSynthesisFileTaskItem(
|
||||
id=row.id,
|
||||
synthesis_instance_id=row.synthesis_instance_id,
|
||||
file_name=row.file_name,
|
||||
source_file_id=row.source_file_id,
|
||||
target_file_location=row.target_file_location,
|
||||
status=row.status,
|
||||
total_chunks=row.total_chunks,
|
||||
processed_chunks=row.processed_chunks,
|
||||
created_at=row.created_at,
|
||||
updated_at=row.updated_at,
|
||||
created_by=row.created_by,
|
||||
updated_by=row.updated_by,
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
paged = PagedDataSynthesisFileTaskResponse(
|
||||
content=file_items,
|
||||
totalElements=total,
|
||||
totalPages=(total + page_size - 1) // page_size,
|
||||
page=page,
|
||||
size=page_size,
|
||||
)
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="Success",
|
||||
data=paged,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/file/{file_id}/chunks", response_model=StandardResponse[PagedDataSynthesisChunkResponse])
|
||||
async def list_chunks_by_file(
|
||||
file_id: str,
|
||||
page: int = 1,
|
||||
page_size: int = 10,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""根据文件任务 ID 分页查询 chunk 记录"""
|
||||
# 校验文件任务是否存在
|
||||
file_task = await db.get(DataSynthesisFileInstance, file_id)
|
||||
if not file_task:
|
||||
raise HTTPException(status_code=404, detail="Synthesis file task not found")
|
||||
|
||||
base_query = select(DataSynthesisChunkInstance).where(
|
||||
DataSynthesisChunkInstance.synthesis_file_instance_id == file_id
|
||||
)
|
||||
|
||||
count_q = select(func.count()).select_from(base_query.subquery())
|
||||
total = (await db.execute(count_q)).scalar_one()
|
||||
|
||||
if page < 1:
|
||||
page = 1
|
||||
if page_size < 1:
|
||||
page_size = 10
|
||||
|
||||
result = await db.execute(
|
||||
base_query.order_by(DataSynthesisChunkInstance.chunk_index.asc())
|
||||
.offset((page - 1) * page_size)
|
||||
.limit(page_size)
|
||||
)
|
||||
rows = result.scalars().all()
|
||||
|
||||
chunk_items = [
|
||||
DataSynthesisChunkItem(
|
||||
id=row.id,
|
||||
synthesis_file_instance_id=row.synthesis_file_instance_id,
|
||||
chunk_index=row.chunk_index,
|
||||
chunk_content=row.chunk_content,
|
||||
chunk_metadata=getattr(row, "chunk_metadata", None),
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
paged = PagedDataSynthesisChunkResponse(
|
||||
content=chunk_items,
|
||||
totalElements=total,
|
||||
totalPages=(total + page_size - 1) // page_size,
|
||||
page=page,
|
||||
size=page_size,
|
||||
)
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="Success",
|
||||
data=paged,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/chunk/{chunk_id}/data", response_model=StandardResponse[list[SynthesisDataItem]])
|
||||
async def list_synthesis_data_by_chunk(
|
||||
chunk_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""根据 chunk ID 查询所有合成结果数据"""
|
||||
# 可选:校验 chunk 是否存在
|
||||
chunk = await db.get(DataSynthesisChunkInstance, chunk_id)
|
||||
if not chunk:
|
||||
raise HTTPException(status_code=404, detail="Chunk not found")
|
||||
|
||||
result = await db.execute(
|
||||
select(SynthesisData).where(SynthesisData.chunk_instance_id == chunk_id)
|
||||
)
|
||||
rows = result.scalars().all()
|
||||
|
||||
items = [
|
||||
SynthesisDataItem(
|
||||
id=row.id,
|
||||
data=row.data,
|
||||
synthesis_file_instance_id=row.synthesis_file_instance_id,
|
||||
chunk_instance_id=row.chunk_instance_id,
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="Success",
|
||||
data=items,
|
||||
)
|
||||
|
||||
@@ -70,6 +70,67 @@ class PagedDataSynthesisTaskResponse(BaseModel):
|
||||
page: int
|
||||
size: int
|
||||
|
||||
|
||||
class DataSynthesisFileTaskItem(BaseModel):
|
||||
"""数据合成任务下的文件任务项"""
|
||||
id: str
|
||||
synthesis_instance_id: str
|
||||
file_name: str
|
||||
source_file_id: str
|
||||
target_file_location: str
|
||||
status: Optional[str] = None
|
||||
total_chunks: int
|
||||
processed_chunks: int
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
created_by: Optional[str] = None
|
||||
updated_by: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
orm_mode = True
|
||||
|
||||
|
||||
class PagedDataSynthesisFileTaskResponse(BaseModel):
|
||||
"""分页数据合成任务文件任务响应"""
|
||||
content: List[DataSynthesisFileTaskItem]
|
||||
totalElements: int
|
||||
totalPages: int
|
||||
page: int
|
||||
size: int
|
||||
|
||||
|
||||
class DataSynthesisChunkItem(BaseModel):
|
||||
"""数据合成文件下的 chunk 记录"""
|
||||
id: str
|
||||
synthesis_file_instance_id: str
|
||||
chunk_index: Optional[int] = None
|
||||
chunk_content: Optional[str] = None
|
||||
chunk_metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
class Config:
|
||||
orm_mode = True
|
||||
|
||||
|
||||
class PagedDataSynthesisChunkResponse(BaseModel):
|
||||
"""分页 chunk 列表响应"""
|
||||
content: List[DataSynthesisChunkItem]
|
||||
totalElements: int
|
||||
totalPages: int
|
||||
page: int
|
||||
size: int
|
||||
|
||||
|
||||
class SynthesisDataItem(BaseModel):
|
||||
"""合成结果数据项"""
|
||||
id: str
|
||||
data: Optional[Dict[str, Any]] = None
|
||||
synthesis_file_instance_id: str
|
||||
chunk_instance_id: str
|
||||
|
||||
class Config:
|
||||
orm_mode = True
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
"""聊天请求参数"""
|
||||
model_id: str
|
||||
|
||||
@@ -168,11 +168,11 @@ class GenerationService:
|
||||
self.db.add(chunk_record)
|
||||
|
||||
# 更新文件任务的分块数量
|
||||
file_task.chunk_count = len(chunks)
|
||||
file_task.total_chunks = len(chunks)
|
||||
file_task.status = "processing"
|
||||
|
||||
await self.db.refresh(file_task)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(file_task)
|
||||
|
||||
async def _invoke_llm_for_chunks(
|
||||
self,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from app.module.generation.schema.generation import SynthesisType
|
||||
|
||||
QA_PROMPT="""
|
||||
# 角色
|
||||
QA_PROMPT="""# 角色
|
||||
你是一位专业的AI助手,擅长从给定的文本中提取关键信息并创建用于教学和测试的问答对。
|
||||
|
||||
# 任务
|
||||
@@ -11,7 +10,7 @@ QA_PROMPT="""
|
||||
{document}
|
||||
|
||||
# 要求与指令
|
||||
1. **问题类型**:生成{synthesis_count - 1}-{synthesis_count + 1}个问答对。问题类型应多样化,包括但不限于:
|
||||
1. **问题类型**:生成 {synthesis_count} 个左右的问答对。问题类型应多样化,包括但不限于:
|
||||
* **事实性**:基于文本中明确提到的事实。
|
||||
* **理解性**:需要理解上下文和概念。
|
||||
* **归纳性**:需要总结或归纳多个信息点。
|
||||
@@ -30,8 +29,7 @@ QA_PROMPT="""
|
||||
"""
|
||||
|
||||
|
||||
COT_PROMPT="""
|
||||
# 角色
|
||||
COT_PROMPT="""# 角色
|
||||
你是一位专业的数据合成专家,擅长基于给定的原始文档和 COT(Chain of Thought,思维链)逻辑,生成高质量、符合实际应用场景的 COT 数据。COT 数据需包含清晰的问题、逐步推理过程和最终结论,能完整还原解决问题的思考路径。
|
||||
|
||||
# 任务
|
||||
@@ -41,7 +39,7 @@ COT_PROMPT="""
|
||||
{document}
|
||||
|
||||
# 要求与指令
|
||||
1. **数量要求**:生成 {min\_count}-{max\_count} 条 COT 数据(min\_count={synthesis\_count-1},max\_count={synthesis\_count+1})。
|
||||
1. **数量要求**:生成 {synthesis_count} 条左右的 COT 数据。
|
||||
2. **内容要求**:
|
||||
* 每条 COT 数据需包含 “问题”“思维链推理”“最终结论” 三部分,逻辑闭环,推理步骤清晰、连贯,不跳跃关键环节。
|
||||
* 问题需基于文档中的事实信息、概念关联或逻辑疑问,是读完文档后自然产生的有价值问题(避免无意义或过于简单的问题)。
|
||||
|
||||
Reference in New Issue
Block a user