feat: enhance backend deployment, frontend file selection and synthesis task management (#129)

* feat: Implement data synthesis task management with database models and API endpoints

* feat: Update Python version requirements and refine dependency constraints in configuration

* fix: Correctly extract file values from selectedFilesMap in AddDataDialog

* feat: Refactor synthesis task routes and enhance file task management in the API

* feat: Enhance SynthesisTaskTab with tooltip actions and add chunk data retrieval in API
This commit is contained in:
Dallas98
2025-12-04 09:57:13 +08:00
committed by GitHub
parent 1d19cd3a62
commit 7012a9ad98
14 changed files with 975 additions and 1193 deletions

View File

@@ -5,12 +5,14 @@ on:
branches: [ "main" ] branches: [ "main" ]
paths: paths:
- 'scripts/images/datamate-python/**' - 'scripts/images/datamate-python/**'
- 'runtime/datamate-python/**'
- '.github/workflows/docker-image-backend-python.yml' - '.github/workflows/docker-image-backend-python.yml'
- '.github/workflows/docker-images-reusable.yml' - '.github/workflows/docker-images-reusable.yml'
pull_request: pull_request:
branches: [ "main" ] branches: [ "main" ]
paths: paths:
- 'scripts/images/datamate-python/**' - 'scripts/images/datamate-python/**'
- 'runtime/datamate-python/**'
- '.github/workflows/docker-image-backend-python.yml' - '.github/workflows/docker-image-backend-python.yml'
- '.github/workflows/docker-images-reusable.yml' - '.github/workflows/docker-images-reusable.yml'
workflow_dispatch: workflow_dispatch:

View File

@@ -10,6 +10,7 @@ services:
volumes: volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
restart: always
networks: networks:
- datamate - datamate
healthcheck: healthcheck:

View File

@@ -1,4 +1,4 @@
import React, { useEffect } from "react"; import React, { useCallback, useEffect } from "react";
import { Button, Input, Table } from "antd"; import { Button, Input, Table } from "antd";
import { RightOutlined } from "@ant-design/icons"; import { RightOutlined } from "@ant-design/icons";
import { mapDataset } from "@/pages/DataManagement/dataset.const"; import { mapDataset } from "@/pages/DataManagement/dataset.const";
@@ -19,6 +19,7 @@ interface DatasetFileTransferProps
open: boolean; open: boolean;
selectedFilesMap: { [key: string]: DatasetFile }; selectedFilesMap: { [key: string]: DatasetFile };
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void; onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
onDatasetSelect?: (dataset: Dataset | null) => void;
} }
const fileCols = [ const fileCols = [
@@ -48,6 +49,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
open, open,
selectedFilesMap, selectedFilesMap,
onSelectedFilesChange, onSelectedFilesChange,
onDatasetSelect,
...props ...props
}) => { }) => {
const [datasets, setDatasets] = React.useState<Dataset[]>([]); const [datasets, setDatasets] = React.useState<Dataset[]>([]);
@@ -96,7 +98,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
300 300
); );
const fetchFiles = async () => { const fetchFiles = useCallback(async () => {
if (!selectedDataset) return; if (!selectedDataset) return;
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, { const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page: filesPagination.current - 1, page: filesPagination.current - 1,
@@ -104,23 +106,25 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
keyword: filesSearch, keyword: filesSearch,
}); });
setFiles( setFiles(
data.content.map((item) => ({ (data.content || []).map((item: DatasetFile) => ({
...item, ...item,
key: item.id, key: item.id,
datasetName: selectedDataset.name, datasetName: selectedDataset.name,
})) || [] }))
); );
setFilesPagination((prev) => ({ setFilesPagination((prev) => ({
...prev, ...prev,
total: data.totalElements, total: data.totalElements,
})); }));
}; }, [filesPagination.current, filesPagination.pageSize, filesSearch, selectedDataset]);
useEffect(() => { useEffect(() => {
if (selectedDataset) { fetchFiles().catch(() => {});
fetchFiles(); }, [fetchFiles]);
}
}, [selectedDataset]); useEffect(() => {
onDatasetSelect?.(selectedDataset);
}, [selectedDataset, onDatasetSelect]);
const toggleSelectFile = (record: DatasetFile) => { const toggleSelectFile = (record: DatasetFile) => {
if (!selectedFilesMap[record.id]) { if (!selectedFilesMap[record.id]) {
@@ -147,8 +151,9 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
setShowFiles(false); setShowFiles(false);
setSelectedDataset(null); setSelectedDataset(null);
setDatasetSelections([]); setDatasetSelections([]);
onDatasetSelect?.(null);
} }
}, [open]); }, [open, onDatasetSelect]);
const datasetCols = [ const datasetCols = [
{ {
@@ -206,7 +211,15 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
})} })}
dataSource={datasets} dataSource={datasets}
columns={datasetCols} columns={datasetCols}
pagination={datasetPagination} pagination={{
...datasetPagination,
onChange: (page, pageSize) =>
setDatasetPagination({
current: page,
pageSize: pageSize || datasetPagination.pageSize,
total: datasetPagination.total,
}),
}}
/> />
</div> </div>
<RightOutlined /> <RightOutlined />
@@ -231,21 +244,11 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
})} })}
rowSelection={{ rowSelection={{
type: "checkbox", type: "checkbox",
onSelectAll: (selected, _, changeRows) => {
const newSelectedFiles = { ...selectedFilesMap };
if (selected) {
changeRows.forEach((row) => {
newSelectedFiles[row.id] = row;
});
} else {
changeRows.forEach((row) => {
delete newSelectedFiles[row.id];
});
}
onSelectedFilesChange(newSelectedFiles);
},
selectedRowKeys: Object.keys(selectedFilesMap), selectedRowKeys: Object.keys(selectedFilesMap),
onSelect: toggleSelectFile, onSelect: toggleSelectFile,
getCheckboxProps: (record: DatasetFile) => ({
name: record.fileName,
}),
}} }}
/> />
</div> </div>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,186 @@
import { useEffect, useState } from "react";
import { useParams, useNavigate } from "react-router";
import { Table, Badge, Button } from "antd";
import type { ColumnsType, TablePaginationConfig } from "antd/es/table";
import { querySynthesisFileTasksUsingGet, querySynthesisTaskByIdUsingGet } from "@/pages/SynthesisTask/synthesis-api";
import type { BadgeProps } from "antd";
import { formatDateTime } from "@/utils/unit";
interface SynthesisFileTaskItem {
id: string;
synthesis_instance_id: string;
file_name: string;
source_file_id: string;
target_file_location: string;
status?: string;
total_chunks: number;
processed_chunks: number;
created_at?: string;
updated_at?: string;
}
interface PagedResponse<T> {
content: T[];
totalElements: number;
totalPages: number;
page: number;
size: number;
}
interface SynthesisTaskInfo {
id: string;
name: string;
synthesis_type: string;
status: string;
created_at: string;
model_id: string;
}
export default function SynthFileTask() {
const { id: taskId = "" } = useParams();
const navigate = useNavigate();
const [loading, setLoading] = useState(false);
const [data, setData] = useState<SynthesisFileTaskItem[]>([]);
const [pagination, setPagination] = useState<TablePaginationConfig>({
current: 1,
pageSize: 10,
total: 0,
});
const [taskInfo, setTaskInfo] = useState<SynthesisTaskInfo | null>(null);
// 查询总任务详情
useEffect(() => {
if (!taskId) return;
querySynthesisTaskByIdUsingGet(taskId).then((res) => {
setTaskInfo(res?.data?.data || null);
});
}, [taskId]);
const fetchData = async (page = 1, pageSize = 10) => {
if (!taskId) return;
setLoading(true);
try {
const res = await querySynthesisFileTasksUsingGet(taskId, {
page,
page_size: pageSize,
});
const payload: PagedResponse<SynthesisFileTaskItem> =
res?.data?.data ?? res?.data ?? {
content: [],
totalElements: 0,
totalPages: 0,
page,
size: pageSize,
};
setData(payload.content || []);
setPagination({
current: payload.page ?? page,
pageSize: payload.size ?? pageSize,
total: payload.totalElements ?? payload.content?.length ?? 0,
});
} finally {
setLoading(false);
}
};
useEffect(() => {
fetchData(1, pagination.pageSize || 10);
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [taskId]);
const handleTableChange = (pag: TablePaginationConfig) => {
fetchData(pag.current || 1, pag.pageSize || 10);
};
const columns: ColumnsType<SynthesisFileTaskItem> = [
{
title: "文件名",
dataIndex: "file_name",
key: "file_name",
},
{
title: "状态",
dataIndex: "status",
key: "status",
render: (status?: string) => {
let badgeStatus: BadgeProps["status"] = "default";
let text = status || "未知";
if (status === "pending" || status === "processing") {
badgeStatus = "processing";
text = "处理中";
} else if (status === "completed") {
badgeStatus = "success";
text = "已完成";
} else if (status === "failed") {
badgeStatus = "error";
text = "失败";
}
return <Badge status={badgeStatus} text={text} />;
},
},
{
title: "切片进度",
key: "chunks",
render: (_text, record) => (
<span>
{record.processed_chunks}/{record.total_chunks}
</span>
),
},
{
title: "目标文件路径",
dataIndex: "target_file_location",
key: "target_file_location",
ellipsis: true,
},
{
title: "创建时间",
dataIndex: "created_at",
key: "created_at",
render: (val?: string) => (val ? formatDateTime(val) : "-"),
},
{
title: "更新时间",
dataIndex: "updated_at",
key: "updated_at",
render: (val?: string) => (val ? formatDateTime(val) : "-"),
},
];
return (
<div className="p-4 bg-white rounded-lg h-full flex flex-col">
{/* 顶部任务信息和返回按钮 */}
<div className="flex items-center justify-between mb-4">
<div className="space-y-1">
{taskInfo && (
<>
<div className="text-lg font-medium flex items-center gap-2">
<span>{taskInfo.name}</span>
<span className="text-xs px-2 py-0.5 rounded bg-blue-50 text-blue-700 border border-blue-200">
{taskInfo.synthesis_type === "QA" ? "问答对生成" : taskInfo.synthesis_type === "COT" ? "链式推理生成" : taskInfo.synthesis_type}
</span>
<span className="text-xs px-2 py-0.5 rounded bg-gray-50 text-gray-700 border border-gray-200">
{taskInfo.status === "pending" ? "等待中" : taskInfo.status === "completed" ? "已完成" : taskInfo.status === "failed" ? "失败" : taskInfo.status}
</span>
</div>
<div className="text-xs text-gray-500 flex gap-4">
<span>{formatDateTime(taskInfo.created_at)}</span>
<span>ID{taskInfo.model_id}</span>
</div>
</>
)}
</div>
<Button type="default" onClick={() => navigate("/data/synthesis/task")}></Button>
</div>
{/* 文件任务表格 */}
<Table<SynthesisFileTaskItem>
rowKey="id"
loading={loading}
dataSource={data}
columns={columns}
pagination={pagination}
onChange={handleTableChange}
/>
</div>
);
}

View File

@@ -1,112 +1,116 @@
import { useState } from "react"; import { useState, useEffect, ElementType } from "react";
import { Card, Button, Badge, Table, Progress } from "antd"; import { Card, Button, Badge, Table, Modal, message, Tooltip } from "antd";
import { import {
Plus, Plus,
Sparkles,
ArrowUp, ArrowUp,
ArrowDown, ArrowDown,
Pause, Pause,
Play, Play,
DownloadIcon,
CheckCircle, CheckCircle,
Check, Sparkles,
StopCircle,
} from "lucide-react"; } from "lucide-react";
import type { SynthesisTask } from "@/pages/SynthesisTask/synthesis"; import { DeleteOutlined, EyeOutlined } from "@ant-design/icons";
import { mockSynthesisTasks } from "@/mock/synthesis";
import { Link, useNavigate } from "react-router"; import { Link, useNavigate } from "react-router";
import { SearchControls } from "@/components/SearchControls"; import { SearchControls } from "@/components/SearchControls";
import { formatDateTime } from "@/utils/unit"; import { formatDateTime } from "@/utils/unit";
import {
querySynthesisTasksUsingGet,
deleteSynthesisTaskByIdUsingDelete,
} from "@/pages/SynthesisTask/synthesis-api";
interface SynthesisTask {
id: string;
name: string;
description?: string;
status: string;
synthesis_type: string;
model_id: string;
progress?: number;
result_data_location?: string;
text_split_config?: {
chunk_size: number;
chunk_overlap: number;
};
synthesis_config?: {
temperature?: number | null;
prompt_template?: string;
synthesis_count?: number | null;
};
source_file_id?: string[];
total_files?: number;
processed_files?: number;
total_chunks?: number;
processed_chunks?: number;
total_synthesis_data?: number;
created_at: string;
updated_at?: string;
created_by?: string;
updated_by?: string;
}
export default function SynthesisTaskTab() { export default function SynthesisTaskTab() {
const navigate = useNavigate(); const navigate = useNavigate();
const [searchQuery, setSearchQuery] = useState(""); const [searchQuery, setSearchQuery] = useState("");
const [tasks, setTasks] = useState<SynthesisTask[]>(mockSynthesisTasks); const [tasks, setTasks] = useState<SynthesisTask[]>([]);
const [filterStatus, setFilterStatus] = useState("all"); const [filterStatus, setFilterStatus] = useState("all");
const [sortBy, setSortBy] = useState<"createdAt" | "name">("createdAt"); const [sortBy, setSortBy] = useState<"createdAt" | "name">("createdAt");
const [sortOrder, setSortOrder] = useState<"asc" | "desc">("desc"); const [sortOrder, setSortOrder] = useState<"asc" | "desc">("desc");
const [page, setPage] = useState(1);
const [pageSize, setPageSize] = useState(10);
const [total, setTotal] = useState(0);
const [loading, setLoading] = useState(false);
// 过滤任务 // 获取任务列表
const filteredTasks = tasks.filter((task) => { const loadTasks = async () => {
const matchesSearch = setLoading(true);
task.name.toLowerCase().includes(searchQuery.toLowerCase()) || try {
task.template.toLowerCase().includes(searchQuery.toLowerCase()); const params = {
const matchesStatus = page: page,
filterStatus === "all" || task.status === filterStatus; page_size: pageSize,
return matchesSearch && matchesStatus; } as {
}); page?: number;
page_size?: number;
// 排序任务 synthesis_type?: string;
const sortedTasks = [...filteredTasks].sort((a, b) => { status?: string;
if (sortBy === "createdAt") { name?: string;
const dateA = new Date(a.createdAt).getTime(); };
const dateB = new Date(b.createdAt).getTime(); if (searchQuery) params.name = searchQuery;
return sortOrder === "asc" ? dateA - dateB : dateB - dateA; if (filterStatus !== "all") params.synthesis_type = filterStatus;
} else if (sortBy === "name") { const res = await querySynthesisTasksUsingGet(params);
return sortOrder === "asc" setTasks(res?.data?.content || []);
? a.name.localeCompare(b.name) setTotal(res?.data?.totalElements || 0);
: b.name.localeCompare(a.name); } catch {
setTasks([]);
setTotal(0);
} finally {
setLoading(false);
} }
return 0;
});
const handleTaskAction = (taskId: number, action: string) => {
setTasks((prev) =>
prev.map((task) => {
if (task.id === taskId) {
switch (action) {
case "pause":
return { ...task, status: "paused" as const };
case "resume":
return { ...task, status: "running" as const };
case "stop":
return {
...task,
status: "failed" as const,
progress: task.progress,
};
default:
return task;
}
}
return task;
})
);
}; };
useEffect(() => {
loadTasks();
// eslint-disable-next-line
}, [searchQuery, filterStatus, page, pageSize]);
// 状态徽章 // 状态徽章
const getStatusBadge = (status: string) => { const getStatusBadge = (status: string) => {
const statusConfig = { const statusConfig: Record<string, { label: string; color: string; icon: ElementType }> = {
pending: { pending: { label: "等待中", color: "#F59E0B", icon: Pause },
label: "等待中", running: { label: "运行中", color: "#3B82F6", icon: Play },
color: "#F59E0B", completed: { label: "已完成", color: "#10B981", icon: CheckCircle },
icon: Pause, failed: { label: "失败", color: "#EF4444", icon: Pause },
}, paused: { label: "已暂停", color: "#E5E7EB", icon: Pause },
running: {
label: "运行中",
color: "#3B82F6",
icon: Play,
},
completed: {
label: "已完成",
color: "#10B981",
icon: CheckCircle,
},
failed: {
label: "失败",
color: "#EF4444",
icon: Pause,
},
paused: {
label: "已暂停",
color: "#E5E7EB",
icon: Pause,
},
}; };
return ( return statusConfig[status] ?? statusConfig["pending"];
statusConfig[status as keyof typeof statusConfig] || statusConfig.pending
);
}; };
// 任务表格列 // 类型映射
const typeMap: Record<string, string> = {
QA: "问答对生成",
COT: "链式推理生成",
};
// 表格列
const taskColumns = [ const taskColumns = [
{ {
title: ( title: (
@@ -134,98 +138,77 @@ export default function SynthesisTaskTab() {
dataIndex: "name", dataIndex: "name",
key: "name", key: "name",
fixed: "left" as const, fixed: "left" as const,
render: (text: string, task: SynthesisTask) => ( render: (_: unknown, task: SynthesisTask) => (
<div className="flex items-center gap-3"> <div className="flex items-center gap-3">
<div className="w-8 h-8 bg-blue-500 rounded-lg flex items-center justify-center shadow-sm"> <div className="w-8 h-8 bg-blue-500 rounded-lg flex items-center justify-center shadow-sm">
{/* 可根据 type 渲染不同图标 */}
<span className="text-white font-bold text-base"> <span className="text-white font-bold text-base">
{task.type?.toUpperCase()?.slice(0, 1) || "T"} {task.synthesis_type?.toUpperCase()?.slice(0, 1) || "T"}
</span> </span>
</div> </div>
<div> <div>
<Link to={`/data/synthesis/task/${task.id}`}>{task.name}</Link> <Link to={`/data/synthesis/task/${task.id}`}>{task.name}</Link>
<div className="text-xs text-gray-500">{task.template}</div>
</div> </div>
</div> </div>
), ),
}, },
{ {
title: "类型", title: "类型",
dataIndex: "type", dataIndex: "synthesis_type",
key: "type", key: "synthesis_type",
render: (type: string) => type.toUpperCase(), render: (type: string) => typeMap[type] || type,
}, },
{ {
title: "状态", title: "文件数",
dataIndex: "status", dataIndex: "total_files",
key: "status", key: "total_files",
render: (status: string) => { render: (num: number, task: SynthesisTask) => <span>{num ?? (task.source_file_id?.length ?? 0)}</span>,
const statusConfig = getStatusBadge(status);
return <Badge color={statusConfig.color} text={statusConfig.label} />;
},
},
{
title: "进度",
dataIndex: "progress",
key: "progress",
width: 150,
render: (_: any, task: SynthesisTask) => (
<Progress percent={task.progress} size="small" />
),
},
{
title: "源数据集",
dataIndex: "sourceDataset",
key: "sourceDataset",
render: (text: string) => (
<div className="text-sm text-gray-900">{text}</div>
),
},
{
title: "生成数量",
dataIndex: "generatedCount",
key: "generatedCount",
render: (_: any, task: SynthesisTask) => (
<div className="text-sm font-medium text-gray-900">
{task.generatedCount?.toLocaleString?.()} /{" "}
{task.targetCount?.toLocaleString?.()}
</div>
),
},
{
title: "质量评分",
dataIndex: "quality",
key: "quality",
render: (quality: number) => (quality ? `${quality}%` : "-"),
}, },
{ {
title: "创建时间", title: "创建时间",
dataIndex: "createdAt", dataIndex: "created_at",
key: "createdAt", key: "created_at",
render: formatDateTime, render: (val: string) => formatDateTime(val),
}, },
{ {
title: "操作", title: "操作",
key: "actions", key: "actions",
fixed: "right" as const, fixed: "right" as const,
render: (_: any, task: SynthesisTask) => ( render: (_: unknown, task: SynthesisTask) => (
<div className="flex items-center justify-center gap-1"> <div className="flex items-center justify-center gap-1">
{task.status === "running" && ( <Tooltip title="查看详情">
<Button <Button
onClick={() => handleTaskAction(task.id, "pause")} onClick={() => navigate(`/data/synthesis/task/${task.id}`)}
className="hover:bg-orange-50 p-1 h-7 w-7" className="hover:bg-blue-50 p-1 h-7 w-7"
type="text" type="text"
icon={<Pause className="w-4 h-4" />} icon={<EyeOutlined />}
></Button> />
)} </Tooltip>
{task.status === "paused" && ( <Tooltip title="删除任务">
<Button <Button
onClick={() => handleTaskAction(task.id, "resume")} danger
className="hover:bg-green-50 p-1 h-7 w-7"
type="text" type="text"
icon={<Play className="w-4 h-4" />} className="hover:bg-red-50 p-1 h-7 w-7"
></Button> icon={<DeleteOutlined />}
)} onClick={() => {
Modal.confirm({
title: `确认删除任务?`,
content: `任务名:${task.name}`,
okText: "删除",
okType: "danger",
cancelText: "取消",
onOk: async () => {
try {
await deleteSynthesisTaskByIdUsingDelete(task.id);
message.success("删除成功");
loadTasks();
} catch {
message.error("删除失败");
}
},
});
}}
/>
</Tooltip>
</div> </div>
), ),
}, },
@@ -237,18 +220,15 @@ export default function SynthesisTaskTab() {
<SearchControls <SearchControls
searchTerm={searchQuery} searchTerm={searchQuery}
onSearchChange={setSearchQuery} onSearchChange={setSearchQuery}
searchPlaceholder="搜索任务名称或模板..." searchPlaceholder="搜索任务名称..."
filters={[ filters={[
{ {
key: "status", key: "status",
label: "状态", label: "类型",
options: [ options: [
{ label: "全部状态", value: "all" }, { label: "全部类型", value: "all" },
{ label: "等待中", value: "pending" }, { label: "问答对生成", value: "QA" },
{ label: "运行中", value: "running" }, { label: "链式推理生成", value: "COT" },
{ label: "已完成", value: "completed" },
{ label: "失败", value: "failed" },
{ label: "已暂停", value: "paused" },
], ],
}, },
]} ]}
@@ -259,13 +239,23 @@ export default function SynthesisTaskTab() {
showFilters showFilters
showViewToggle={false} showViewToggle={false}
/> />
{/* 任务表格 */} {/* 任务表格 */}
<Card> <Card>
<Table <Table
columns={taskColumns} columns={taskColumns}
dataSource={sortedTasks} dataSource={tasks}
rowKey="id" rowKey="id"
loading={loading}
pagination={{
current: page,
pageSize: pageSize,
total: total,
onChange: (p, ps) => {
setPage(p);
setPageSize(ps);
},
showSizeChanger: true,
}}
scroll={{ x: "max-content" }} scroll={{ x: "max-content" }}
locale={{ locale={{
emptyText: ( emptyText: (

View File

@@ -0,0 +1,37 @@
import { get, post, del } from "@/utils/request";
// 创建数据合成任务
export function createSynthesisTaskUsingPost(data: unknown) {
return post("/api/synthesis/gen/task", data);
}
// 获取数据合成任务详情
export function querySynthesisTaskByIdUsingGet(taskId: string) {
return get(`/api/synthesis/gen/task/${taskId}`);
}
// 分页查询数据合成任务列表
export function querySynthesisTasksUsingGet(params: {
page?: number;
page_size?: number;
synthesis_type?: string;
status?: string;
name?: string;
}) {
return get(`/api/synthesis/gen/tasks`, params as any);
}
// 删除整个数据合成任务
export function deleteSynthesisTaskByIdUsingDelete(taskId: string) {
return del(`/api/synthesis/gen/task/${taskId}`);
}
// 分页查询某个任务下的文件任务列表
export function querySynthesisFileTasksUsingGet(taskId: string, params: { page?: number; page_size?: number }) {
return get(`/api/synthesis/gen/task/${taskId}/files`, params as any);
}
// 获取不同合成类型对应的 Prompt
export function getPromptByTypeUsingGet(synthType: string) {
return get(`/api/synthesis/gen/prompt`, { synth_type: synthType } as any);
}

View File

@@ -40,6 +40,7 @@ import { withErrorBoundary } from "@/components/ErrorBoundary";
import AgentPage from "@/pages/Agent/Agent.tsx"; import AgentPage from "@/pages/Agent/Agent.tsx";
import RatioTaskDetail from "@/pages/RatioTask/Detail/RatioTaskDetail"; import RatioTaskDetail from "@/pages/RatioTask/Detail/RatioTaskDetail";
import CleansingTemplateDetail from "@/pages/DataCleansing/Detail/TemplateDetail"; import CleansingTemplateDetail from "@/pages/DataCleansing/Detail/TemplateDetail";
import SynthFileTask from "@/pages/SynthesisTask/SynthFileTask.tsx";
import EvaluationDetailPage from "@/pages/DataEvaluation/Detail/TaskDetail.tsx"; import EvaluationDetailPage from "@/pages/DataEvaluation/Detail/TaskDetail.tsx";
const router = createBrowserRouter([ const router = createBrowserRouter([
@@ -160,6 +161,7 @@ const router = createBrowserRouter([
path: "create", path: "create",
Component: SynthesisTaskCreate, Component: SynthesisTaskCreate,
}, },
{path: ":id", Component: SynthFileTask},
], ],
}, },
{ {

View File

@@ -1,8 +1,8 @@
from fastapi import APIRouter from fastapi import APIRouter
router = APIRouter( router = APIRouter(
prefix="/synth", prefix="/synthesis",
tags = ["synth"] tags = ["synthesis"]
) )
# Include sub-routers # Include sub-routers

View File

@@ -18,7 +18,14 @@ from app.db.session import get_db
from app.module.generation.schema.generation import ( from app.module.generation.schema.generation import (
CreateSynthesisTaskRequest, CreateSynthesisTaskRequest,
DataSynthesisTaskItem, DataSynthesisTaskItem,
PagedDataSynthesisTaskResponse, SynthesisType) PagedDataSynthesisTaskResponse,
SynthesisType,
DataSynthesisFileTaskItem,
PagedDataSynthesisFileTaskResponse,
DataSynthesisChunkItem,
PagedDataSynthesisChunkResponse,
SynthesisDataItem,
)
from app.module.generation.service.generation_service import GenerationService from app.module.generation.service.generation_service import GenerationService
from app.module.generation.service.prompt import get_prompt from app.module.generation.service.prompt import get_prompt
from app.module.shared.schema import StandardResponse from app.module.shared.schema import StandardResponse
@@ -219,19 +226,26 @@ async def delete_synthesis_task(
data=None, data=None,
) )
@router.delete("/task/{task_id}/{file_id}", response_model=StandardResponse[None]) @router.delete("/task/{task_id}/{file_id}", response_model=StandardResponse[None])
async def delete_synthesis_file_task( async def delete_synthesis_file_task(
task_id: str, task_id: str,
file_id: str, file_id: str,
db: AsyncSession = Depends(get_db) db: AsyncSession = Depends(get_db)
): ):
"""删除数据合成任务中的文件任务""" """删除数据合成任务中的文件任务,同时刷新任务表中的文件/切片数量"""
# 先获取任务和文件任务记录
task = await db.get(DataSynthesisInstance, task_id)
if not task:
raise HTTPException(status_code=404, detail="Synthesis task not found")
file_task = await db.get(DataSynthesisFileInstance, file_id) file_task = await db.get(DataSynthesisFileInstance, file_id)
if not file_task: if not file_task:
raise HTTPException(status_code=404, detail="Synthesis file task not found") raise HTTPException(status_code=404, detail="Synthesis file task not found")
# 删除 SynthesisData(根据文件任务ID) # 删除 SynthesisData(根据文件任务ID)
await db.execute(delete(SynthesisData).where( await db.execute(
delete(SynthesisData).where(
SynthesisData.synthesis_file_instance_id == file_id SynthesisData.synthesis_file_instance_id == file_id
) )
) )
@@ -243,11 +257,28 @@ async def delete_synthesis_file_task(
) )
# 删除文件任务记录 # 删除文件任务记录
await db.execute(delete(DataSynthesisFileInstance).where( await db.execute(
delete(DataSynthesisFileInstance).where(
DataSynthesisFileInstance.id == file_id DataSynthesisFileInstance.id == file_id
) )
) )
# 刷新任务级别统计字段:总文件数、总文本块数、已处理文本块数
if task.total_files and task.total_files > 0:
task.total_files -= 1
if task.total_files < 0:
task.total_files = 0
await db.commit()
await db.refresh(task)
return StandardResponse(
code=200,
message="success",
data=None,
)
@router.get("/prompt", response_model=StandardResponse[str]) @router.get("/prompt", response_model=StandardResponse[str])
async def get_prompt_by_type( async def get_prompt_by_type(
synth_type: SynthesisType, synth_type: SynthesisType,
@@ -258,3 +289,157 @@ async def get_prompt_by_type(
message="Success", message="Success",
data=prompt, data=prompt,
) )
@router.get("/task/{task_id}/files", response_model=StandardResponse[PagedDataSynthesisFileTaskResponse])
async def list_synthesis_file_tasks(
task_id: str,
page: int = 1,
page_size: int = 10,
db: AsyncSession = Depends(get_db),
):
"""分页获取某个数据合成任务下的文件任务列表"""
# 先校验任务是否存在
task = await db.get(DataSynthesisInstance, task_id)
if not task:
raise HTTPException(status_code=404, detail="Synthesis task not found")
base_query = select(DataSynthesisFileInstance).where(
DataSynthesisFileInstance.synthesis_instance_id == task_id
)
count_q = select(func.count()).select_from(base_query.subquery())
total = (await db.execute(count_q)).scalar_one()
if page < 1:
page = 1
if page_size < 1:
page_size = 10
result = await db.execute(
base_query.offset((page - 1) * page_size).limit(page_size)
)
rows = result.scalars().all()
file_items = [
DataSynthesisFileTaskItem(
id=row.id,
synthesis_instance_id=row.synthesis_instance_id,
file_name=row.file_name,
source_file_id=row.source_file_id,
target_file_location=row.target_file_location,
status=row.status,
total_chunks=row.total_chunks,
processed_chunks=row.processed_chunks,
created_at=row.created_at,
updated_at=row.updated_at,
created_by=row.created_by,
updated_by=row.updated_by,
)
for row in rows
]
paged = PagedDataSynthesisFileTaskResponse(
content=file_items,
totalElements=total,
totalPages=(total + page_size - 1) // page_size,
page=page,
size=page_size,
)
return StandardResponse(
code=200,
message="Success",
data=paged,
)
@router.get("/file/{file_id}/chunks", response_model=StandardResponse[PagedDataSynthesisChunkResponse])
async def list_chunks_by_file(
file_id: str,
page: int = 1,
page_size: int = 10,
db: AsyncSession = Depends(get_db),
):
"""根据文件任务 ID 分页查询 chunk 记录"""
# 校验文件任务是否存在
file_task = await db.get(DataSynthesisFileInstance, file_id)
if not file_task:
raise HTTPException(status_code=404, detail="Synthesis file task not found")
base_query = select(DataSynthesisChunkInstance).where(
DataSynthesisChunkInstance.synthesis_file_instance_id == file_id
)
count_q = select(func.count()).select_from(base_query.subquery())
total = (await db.execute(count_q)).scalar_one()
if page < 1:
page = 1
if page_size < 1:
page_size = 10
result = await db.execute(
base_query.order_by(DataSynthesisChunkInstance.chunk_index.asc())
.offset((page - 1) * page_size)
.limit(page_size)
)
rows = result.scalars().all()
chunk_items = [
DataSynthesisChunkItem(
id=row.id,
synthesis_file_instance_id=row.synthesis_file_instance_id,
chunk_index=row.chunk_index,
chunk_content=row.chunk_content,
chunk_metadata=getattr(row, "chunk_metadata", None),
)
for row in rows
]
paged = PagedDataSynthesisChunkResponse(
content=chunk_items,
totalElements=total,
totalPages=(total + page_size - 1) // page_size,
page=page,
size=page_size,
)
return StandardResponse(
code=200,
message="Success",
data=paged,
)
@router.get("/chunk/{chunk_id}/data", response_model=StandardResponse[list[SynthesisDataItem]])
async def list_synthesis_data_by_chunk(
chunk_id: str,
db: AsyncSession = Depends(get_db),
):
"""根据 chunk ID 查询所有合成结果数据"""
# 可选:校验 chunk 是否存在
chunk = await db.get(DataSynthesisChunkInstance, chunk_id)
if not chunk:
raise HTTPException(status_code=404, detail="Chunk not found")
result = await db.execute(
select(SynthesisData).where(SynthesisData.chunk_instance_id == chunk_id)
)
rows = result.scalars().all()
items = [
SynthesisDataItem(
id=row.id,
data=row.data,
synthesis_file_instance_id=row.synthesis_file_instance_id,
chunk_instance_id=row.chunk_instance_id,
)
for row in rows
]
return StandardResponse(
code=200,
message="Success",
data=items,
)

View File

@@ -70,6 +70,67 @@ class PagedDataSynthesisTaskResponse(BaseModel):
page: int page: int
size: int size: int
class DataSynthesisFileTaskItem(BaseModel):
"""数据合成任务下的文件任务项"""
id: str
synthesis_instance_id: str
file_name: str
source_file_id: str
target_file_location: str
status: Optional[str] = None
total_chunks: int
processed_chunks: int
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
created_by: Optional[str] = None
updated_by: Optional[str] = None
class Config:
orm_mode = True
class PagedDataSynthesisFileTaskResponse(BaseModel):
"""分页数据合成任务文件任务响应"""
content: List[DataSynthesisFileTaskItem]
totalElements: int
totalPages: int
page: int
size: int
class DataSynthesisChunkItem(BaseModel):
"""数据合成文件下的 chunk 记录"""
id: str
synthesis_file_instance_id: str
chunk_index: Optional[int] = None
chunk_content: Optional[str] = None
chunk_metadata: Optional[Dict[str, Any]] = None
class Config:
orm_mode = True
class PagedDataSynthesisChunkResponse(BaseModel):
"""分页 chunk 列表响应"""
content: List[DataSynthesisChunkItem]
totalElements: int
totalPages: int
page: int
size: int
class SynthesisDataItem(BaseModel):
"""合成结果数据项"""
id: str
data: Optional[Dict[str, Any]] = None
synthesis_file_instance_id: str
chunk_instance_id: str
class Config:
orm_mode = True
class ChatRequest(BaseModel): class ChatRequest(BaseModel):
"""聊天请求参数""" """聊天请求参数"""
model_id: str model_id: str

View File

@@ -168,11 +168,11 @@ class GenerationService:
self.db.add(chunk_record) self.db.add(chunk_record)
# 更新文件任务的分块数量 # 更新文件任务的分块数量
file_task.chunk_count = len(chunks) file_task.total_chunks = len(chunks)
file_task.status = "processing" file_task.status = "processing"
await self.db.refresh(file_task)
await self.db.commit() await self.db.commit()
await self.db.refresh(file_task)
async def _invoke_llm_for_chunks( async def _invoke_llm_for_chunks(
self, self,

View File

@@ -1,7 +1,6 @@
from app.module.generation.schema.generation import SynthesisType from app.module.generation.schema.generation import SynthesisType
QA_PROMPT=""" QA_PROMPT="""# 角色
# 角色
你是一位专业的AI助手,擅长从给定的文本中提取关键信息并创建用于教学和测试的问答对。 你是一位专业的AI助手,擅长从给定的文本中提取关键信息并创建用于教学和测试的问答对。
# 任务 # 任务
@@ -11,7 +10,7 @@ QA_PROMPT="""
{document} {document}
# 要求与指令 # 要求与指令
1. **问题类型**:生成{synthesis_count - 1}-{synthesis_count + 1}个问答对。问题类型应多样化,包括但不限于: 1. **问题类型**:生成 {synthesis_count} 个左右的问答对。问题类型应多样化,包括但不限于:
* **事实性**:基于文本中明确提到的事实。 * **事实性**:基于文本中明确提到的事实。
* **理解性**:需要理解上下文和概念。 * **理解性**:需要理解上下文和概念。
* **归纳性**:需要总结或归纳多个信息点。 * **归纳性**:需要总结或归纳多个信息点。
@@ -30,8 +29,7 @@ QA_PROMPT="""
""" """
COT_PROMPT=""" COT_PROMPT="""# 角色
# 角色
你是一位专业的数据合成专家,擅长基于给定的原始文档和 COT(Chain of Thought,思维链)逻辑,生成高质量、符合实际应用场景的 COT 数据。COT 数据需包含清晰的问题、逐步推理过程和最终结论,能完整还原解决问题的思考路径。 你是一位专业的数据合成专家,擅长基于给定的原始文档和 COT(Chain of Thought,思维链)逻辑,生成高质量、符合实际应用场景的 COT 数据。COT 数据需包含清晰的问题、逐步推理过程和最终结论,能完整还原解决问题的思考路径。
# 任务 # 任务
@@ -41,7 +39,7 @@ COT_PROMPT="""
{document} {document}
# 要求与指令 # 要求与指令
1. **数量要求**:生成 {min\_count}-{max\_count} 条 COT 数据(min\_count={synthesis\_count-1},max\_count={synthesis\_count+1}) 1. **数量要求**:生成 {synthesis_count} 条左右的 COT 数据
2. **内容要求**: 2. **内容要求**:
* 每条 COT 数据需包含 “问题”“思维链推理”“最终结论” 三部分,逻辑闭环,推理步骤清晰、连贯,不跳跃关键环节。 * 每条 COT 数据需包含 “问题”“思维链推理”“最终结论” 三部分,逻辑闭环,推理步骤清晰、连贯,不跳跃关键环节。
* 问题需基于文档中的事实信息、概念关联或逻辑疑问,是读完文档后自然产生的有价值问题(避免无意义或过于简单的问题)。 * 问题需基于文档中的事实信息、概念关联或逻辑疑问,是读完文档后自然产生的有价值问题(避免无意义或过于简单的问题)。