You've already forked DataMate
feat: enhance backend deployment, frontend file selection and synthesis task management (#129)
* feat: Implement data synthesis task management with database models and API endpoints * feat: Update Python version requirements and refine dependency constraints in configuration * fix: Correctly extract file values from selectedFilesMap in AddDataDialog * feat: Refactor synthesis task routes and enhance file task management in the API * feat: Enhance SynthesisTaskTab with tooltip actions and add chunk data retrieval in API
This commit is contained in:
@@ -5,12 +5,14 @@ on:
|
|||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
paths:
|
paths:
|
||||||
- 'scripts/images/datamate-python/**'
|
- 'scripts/images/datamate-python/**'
|
||||||
|
- 'runtime/datamate-python/**'
|
||||||
- '.github/workflows/docker-image-backend-python.yml'
|
- '.github/workflows/docker-image-backend-python.yml'
|
||||||
- '.github/workflows/docker-images-reusable.yml'
|
- '.github/workflows/docker-images-reusable.yml'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
paths:
|
paths:
|
||||||
- 'scripts/images/datamate-python/**'
|
- 'scripts/images/datamate-python/**'
|
||||||
|
- 'runtime/datamate-python/**'
|
||||||
- '.github/workflows/docker-image-backend-python.yml'
|
- '.github/workflows/docker-image-backend-python.yml'
|
||||||
- '.github/workflows/docker-images-reusable.yml'
|
- '.github/workflows/docker-images-reusable.yml'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
|
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
|
||||||
command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
|
command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
|
||||||
|
restart: always
|
||||||
networks:
|
networks:
|
||||||
- datamate
|
- datamate
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import React, { useEffect } from "react";
|
import React, { useCallback, useEffect } from "react";
|
||||||
import { Button, Input, Table } from "antd";
|
import { Button, Input, Table } from "antd";
|
||||||
import { RightOutlined } from "@ant-design/icons";
|
import { RightOutlined } from "@ant-design/icons";
|
||||||
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
||||||
@@ -19,6 +19,7 @@ interface DatasetFileTransferProps
|
|||||||
open: boolean;
|
open: boolean;
|
||||||
selectedFilesMap: { [key: string]: DatasetFile };
|
selectedFilesMap: { [key: string]: DatasetFile };
|
||||||
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
|
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
|
||||||
|
onDatasetSelect?: (dataset: Dataset | null) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
const fileCols = [
|
const fileCols = [
|
||||||
@@ -48,6 +49,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
open,
|
open,
|
||||||
selectedFilesMap,
|
selectedFilesMap,
|
||||||
onSelectedFilesChange,
|
onSelectedFilesChange,
|
||||||
|
onDatasetSelect,
|
||||||
...props
|
...props
|
||||||
}) => {
|
}) => {
|
||||||
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
|
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
|
||||||
@@ -96,7 +98,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
300
|
300
|
||||||
);
|
);
|
||||||
|
|
||||||
const fetchFiles = async () => {
|
const fetchFiles = useCallback(async () => {
|
||||||
if (!selectedDataset) return;
|
if (!selectedDataset) return;
|
||||||
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
||||||
page: filesPagination.current - 1,
|
page: filesPagination.current - 1,
|
||||||
@@ -104,23 +106,25 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
keyword: filesSearch,
|
keyword: filesSearch,
|
||||||
});
|
});
|
||||||
setFiles(
|
setFiles(
|
||||||
data.content.map((item) => ({
|
(data.content || []).map((item: DatasetFile) => ({
|
||||||
...item,
|
...item,
|
||||||
key: item.id,
|
key: item.id,
|
||||||
datasetName: selectedDataset.name,
|
datasetName: selectedDataset.name,
|
||||||
})) || []
|
}))
|
||||||
);
|
);
|
||||||
setFilesPagination((prev) => ({
|
setFilesPagination((prev) => ({
|
||||||
...prev,
|
...prev,
|
||||||
total: data.totalElements,
|
total: data.totalElements,
|
||||||
}));
|
}));
|
||||||
};
|
}, [filesPagination.current, filesPagination.pageSize, filesSearch, selectedDataset]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (selectedDataset) {
|
fetchFiles().catch(() => {});
|
||||||
fetchFiles();
|
}, [fetchFiles]);
|
||||||
}
|
|
||||||
}, [selectedDataset]);
|
useEffect(() => {
|
||||||
|
onDatasetSelect?.(selectedDataset);
|
||||||
|
}, [selectedDataset, onDatasetSelect]);
|
||||||
|
|
||||||
const toggleSelectFile = (record: DatasetFile) => {
|
const toggleSelectFile = (record: DatasetFile) => {
|
||||||
if (!selectedFilesMap[record.id]) {
|
if (!selectedFilesMap[record.id]) {
|
||||||
@@ -147,8 +151,9 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
setShowFiles(false);
|
setShowFiles(false);
|
||||||
setSelectedDataset(null);
|
setSelectedDataset(null);
|
||||||
setDatasetSelections([]);
|
setDatasetSelections([]);
|
||||||
|
onDatasetSelect?.(null);
|
||||||
}
|
}
|
||||||
}, [open]);
|
}, [open, onDatasetSelect]);
|
||||||
|
|
||||||
const datasetCols = [
|
const datasetCols = [
|
||||||
{
|
{
|
||||||
@@ -206,7 +211,15 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
})}
|
})}
|
||||||
dataSource={datasets}
|
dataSource={datasets}
|
||||||
columns={datasetCols}
|
columns={datasetCols}
|
||||||
pagination={datasetPagination}
|
pagination={{
|
||||||
|
...datasetPagination,
|
||||||
|
onChange: (page, pageSize) =>
|
||||||
|
setDatasetPagination({
|
||||||
|
current: page,
|
||||||
|
pageSize: pageSize || datasetPagination.pageSize,
|
||||||
|
total: datasetPagination.total,
|
||||||
|
}),
|
||||||
|
}}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
<RightOutlined />
|
<RightOutlined />
|
||||||
@@ -231,21 +244,11 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
})}
|
})}
|
||||||
rowSelection={{
|
rowSelection={{
|
||||||
type: "checkbox",
|
type: "checkbox",
|
||||||
onSelectAll: (selected, _, changeRows) => {
|
|
||||||
const newSelectedFiles = { ...selectedFilesMap };
|
|
||||||
if (selected) {
|
|
||||||
changeRows.forEach((row) => {
|
|
||||||
newSelectedFiles[row.id] = row;
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
changeRows.forEach((row) => {
|
|
||||||
delete newSelectedFiles[row.id];
|
|
||||||
});
|
|
||||||
}
|
|
||||||
onSelectedFilesChange(newSelectedFiles);
|
|
||||||
},
|
|
||||||
selectedRowKeys: Object.keys(selectedFilesMap),
|
selectedRowKeys: Object.keys(selectedFilesMap),
|
||||||
onSelect: toggleSelectFile,
|
onSelect: toggleSelectFile,
|
||||||
|
getCheckboxProps: (record: DatasetFile) => ({
|
||||||
|
name: record.fileName,
|
||||||
|
}),
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
186
frontend/src/pages/SynthesisTask/SynthFileTask.tsx
Normal file
186
frontend/src/pages/SynthesisTask/SynthFileTask.tsx
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
import { useEffect, useState } from "react";
|
||||||
|
import { useParams, useNavigate } from "react-router";
|
||||||
|
import { Table, Badge, Button } from "antd";
|
||||||
|
import type { ColumnsType, TablePaginationConfig } from "antd/es/table";
|
||||||
|
import { querySynthesisFileTasksUsingGet, querySynthesisTaskByIdUsingGet } from "@/pages/SynthesisTask/synthesis-api";
|
||||||
|
import type { BadgeProps } from "antd";
|
||||||
|
import { formatDateTime } from "@/utils/unit";
|
||||||
|
|
||||||
|
interface SynthesisFileTaskItem {
|
||||||
|
id: string;
|
||||||
|
synthesis_instance_id: string;
|
||||||
|
file_name: string;
|
||||||
|
source_file_id: string;
|
||||||
|
target_file_location: string;
|
||||||
|
status?: string;
|
||||||
|
total_chunks: number;
|
||||||
|
processed_chunks: number;
|
||||||
|
created_at?: string;
|
||||||
|
updated_at?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PagedResponse<T> {
|
||||||
|
content: T[];
|
||||||
|
totalElements: number;
|
||||||
|
totalPages: number;
|
||||||
|
page: number;
|
||||||
|
size: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SynthesisTaskInfo {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
synthesis_type: string;
|
||||||
|
status: string;
|
||||||
|
created_at: string;
|
||||||
|
model_id: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function SynthFileTask() {
|
||||||
|
const { id: taskId = "" } = useParams();
|
||||||
|
const navigate = useNavigate();
|
||||||
|
const [loading, setLoading] = useState(false);
|
||||||
|
const [data, setData] = useState<SynthesisFileTaskItem[]>([]);
|
||||||
|
const [pagination, setPagination] = useState<TablePaginationConfig>({
|
||||||
|
current: 1,
|
||||||
|
pageSize: 10,
|
||||||
|
total: 0,
|
||||||
|
});
|
||||||
|
const [taskInfo, setTaskInfo] = useState<SynthesisTaskInfo | null>(null);
|
||||||
|
|
||||||
|
// 查询总任务详情
|
||||||
|
useEffect(() => {
|
||||||
|
if (!taskId) return;
|
||||||
|
querySynthesisTaskByIdUsingGet(taskId).then((res) => {
|
||||||
|
setTaskInfo(res?.data?.data || null);
|
||||||
|
});
|
||||||
|
}, [taskId]);
|
||||||
|
|
||||||
|
const fetchData = async (page = 1, pageSize = 10) => {
|
||||||
|
if (!taskId) return;
|
||||||
|
setLoading(true);
|
||||||
|
try {
|
||||||
|
const res = await querySynthesisFileTasksUsingGet(taskId, {
|
||||||
|
page,
|
||||||
|
page_size: pageSize,
|
||||||
|
});
|
||||||
|
const payload: PagedResponse<SynthesisFileTaskItem> =
|
||||||
|
res?.data?.data ?? res?.data ?? {
|
||||||
|
content: [],
|
||||||
|
totalElements: 0,
|
||||||
|
totalPages: 0,
|
||||||
|
page,
|
||||||
|
size: pageSize,
|
||||||
|
};
|
||||||
|
setData(payload.content || []);
|
||||||
|
setPagination({
|
||||||
|
current: payload.page ?? page,
|
||||||
|
pageSize: payload.size ?? pageSize,
|
||||||
|
total: payload.totalElements ?? payload.content?.length ?? 0,
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchData(1, pagination.pageSize || 10);
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [taskId]);
|
||||||
|
|
||||||
|
const handleTableChange = (pag: TablePaginationConfig) => {
|
||||||
|
fetchData(pag.current || 1, pag.pageSize || 10);
|
||||||
|
};
|
||||||
|
|
||||||
|
const columns: ColumnsType<SynthesisFileTaskItem> = [
|
||||||
|
{
|
||||||
|
title: "文件名",
|
||||||
|
dataIndex: "file_name",
|
||||||
|
key: "file_name",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "状态",
|
||||||
|
dataIndex: "status",
|
||||||
|
key: "status",
|
||||||
|
render: (status?: string) => {
|
||||||
|
let badgeStatus: BadgeProps["status"] = "default";
|
||||||
|
let text = status || "未知";
|
||||||
|
if (status === "pending" || status === "processing") {
|
||||||
|
badgeStatus = "processing";
|
||||||
|
text = "处理中";
|
||||||
|
} else if (status === "completed") {
|
||||||
|
badgeStatus = "success";
|
||||||
|
text = "已完成";
|
||||||
|
} else if (status === "failed") {
|
||||||
|
badgeStatus = "error";
|
||||||
|
text = "失败";
|
||||||
|
}
|
||||||
|
return <Badge status={badgeStatus} text={text} />;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "切片进度",
|
||||||
|
key: "chunks",
|
||||||
|
render: (_text, record) => (
|
||||||
|
<span>
|
||||||
|
{record.processed_chunks}/{record.total_chunks}
|
||||||
|
</span>
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "目标文件路径",
|
||||||
|
dataIndex: "target_file_location",
|
||||||
|
key: "target_file_location",
|
||||||
|
ellipsis: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "创建时间",
|
||||||
|
dataIndex: "created_at",
|
||||||
|
key: "created_at",
|
||||||
|
render: (val?: string) => (val ? formatDateTime(val) : "-"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "更新时间",
|
||||||
|
dataIndex: "updated_at",
|
||||||
|
key: "updated_at",
|
||||||
|
render: (val?: string) => (val ? formatDateTime(val) : "-"),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="p-4 bg-white rounded-lg h-full flex flex-col">
|
||||||
|
{/* 顶部任务信息和返回按钮 */}
|
||||||
|
<div className="flex items-center justify-between mb-4">
|
||||||
|
<div className="space-y-1">
|
||||||
|
{taskInfo && (
|
||||||
|
<>
|
||||||
|
<div className="text-lg font-medium flex items-center gap-2">
|
||||||
|
<span>{taskInfo.name}</span>
|
||||||
|
<span className="text-xs px-2 py-0.5 rounded bg-blue-50 text-blue-700 border border-blue-200">
|
||||||
|
{taskInfo.synthesis_type === "QA" ? "问答对生成" : taskInfo.synthesis_type === "COT" ? "链式推理生成" : taskInfo.synthesis_type}
|
||||||
|
</span>
|
||||||
|
<span className="text-xs px-2 py-0.5 rounded bg-gray-50 text-gray-700 border border-gray-200">
|
||||||
|
状态:{taskInfo.status === "pending" ? "等待中" : taskInfo.status === "completed" ? "已完成" : taskInfo.status === "failed" ? "失败" : taskInfo.status}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="text-xs text-gray-500 flex gap-4">
|
||||||
|
<span>创建时间:{formatDateTime(taskInfo.created_at)}</span>
|
||||||
|
<span>模型ID:{taskInfo.model_id}</span>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<Button type="default" onClick={() => navigate("/data/synthesis/task")}>返回任务首页</Button>
|
||||||
|
</div>
|
||||||
|
{/* 文件任务表格 */}
|
||||||
|
<Table<SynthesisFileTaskItem>
|
||||||
|
rowKey="id"
|
||||||
|
loading={loading}
|
||||||
|
dataSource={data}
|
||||||
|
columns={columns}
|
||||||
|
pagination={pagination}
|
||||||
|
onChange={handleTableChange}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -1,112 +1,116 @@
|
|||||||
import { useState } from "react";
|
import { useState, useEffect, ElementType } from "react";
|
||||||
import { Card, Button, Badge, Table, Progress } from "antd";
|
import { Card, Button, Badge, Table, Modal, message, Tooltip } from "antd";
|
||||||
import {
|
import {
|
||||||
Plus,
|
Plus,
|
||||||
Sparkles,
|
|
||||||
ArrowUp,
|
ArrowUp,
|
||||||
ArrowDown,
|
ArrowDown,
|
||||||
Pause,
|
Pause,
|
||||||
Play,
|
Play,
|
||||||
DownloadIcon,
|
|
||||||
CheckCircle,
|
CheckCircle,
|
||||||
Check,
|
Sparkles,
|
||||||
StopCircle,
|
|
||||||
} from "lucide-react";
|
} from "lucide-react";
|
||||||
import type { SynthesisTask } from "@/pages/SynthesisTask/synthesis";
|
import { DeleteOutlined, EyeOutlined } from "@ant-design/icons";
|
||||||
import { mockSynthesisTasks } from "@/mock/synthesis";
|
|
||||||
import { Link, useNavigate } from "react-router";
|
import { Link, useNavigate } from "react-router";
|
||||||
import { SearchControls } from "@/components/SearchControls";
|
import { SearchControls } from "@/components/SearchControls";
|
||||||
import { formatDateTime } from "@/utils/unit";
|
import { formatDateTime } from "@/utils/unit";
|
||||||
|
import {
|
||||||
|
querySynthesisTasksUsingGet,
|
||||||
|
deleteSynthesisTaskByIdUsingDelete,
|
||||||
|
} from "@/pages/SynthesisTask/synthesis-api";
|
||||||
|
|
||||||
|
interface SynthesisTask {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
description?: string;
|
||||||
|
status: string;
|
||||||
|
synthesis_type: string;
|
||||||
|
model_id: string;
|
||||||
|
progress?: number;
|
||||||
|
result_data_location?: string;
|
||||||
|
text_split_config?: {
|
||||||
|
chunk_size: number;
|
||||||
|
chunk_overlap: number;
|
||||||
|
};
|
||||||
|
synthesis_config?: {
|
||||||
|
temperature?: number | null;
|
||||||
|
prompt_template?: string;
|
||||||
|
synthesis_count?: number | null;
|
||||||
|
};
|
||||||
|
source_file_id?: string[];
|
||||||
|
total_files?: number;
|
||||||
|
processed_files?: number;
|
||||||
|
total_chunks?: number;
|
||||||
|
processed_chunks?: number;
|
||||||
|
total_synthesis_data?: number;
|
||||||
|
created_at: string;
|
||||||
|
updated_at?: string;
|
||||||
|
created_by?: string;
|
||||||
|
updated_by?: string;
|
||||||
|
}
|
||||||
|
|
||||||
export default function SynthesisTaskTab() {
|
export default function SynthesisTaskTab() {
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
const [searchQuery, setSearchQuery] = useState("");
|
const [searchQuery, setSearchQuery] = useState("");
|
||||||
const [tasks, setTasks] = useState<SynthesisTask[]>(mockSynthesisTasks);
|
const [tasks, setTasks] = useState<SynthesisTask[]>([]);
|
||||||
const [filterStatus, setFilterStatus] = useState("all");
|
const [filterStatus, setFilterStatus] = useState("all");
|
||||||
const [sortBy, setSortBy] = useState<"createdAt" | "name">("createdAt");
|
const [sortBy, setSortBy] = useState<"createdAt" | "name">("createdAt");
|
||||||
const [sortOrder, setSortOrder] = useState<"asc" | "desc">("desc");
|
const [sortOrder, setSortOrder] = useState<"asc" | "desc">("desc");
|
||||||
|
const [page, setPage] = useState(1);
|
||||||
|
const [pageSize, setPageSize] = useState(10);
|
||||||
|
const [total, setTotal] = useState(0);
|
||||||
|
const [loading, setLoading] = useState(false);
|
||||||
|
|
||||||
// 过滤任务
|
// 获取任务列表
|
||||||
const filteredTasks = tasks.filter((task) => {
|
const loadTasks = async () => {
|
||||||
const matchesSearch =
|
setLoading(true);
|
||||||
task.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
|
try {
|
||||||
task.template.toLowerCase().includes(searchQuery.toLowerCase());
|
const params = {
|
||||||
const matchesStatus =
|
page: page,
|
||||||
filterStatus === "all" || task.status === filterStatus;
|
page_size: pageSize,
|
||||||
return matchesSearch && matchesStatus;
|
} as {
|
||||||
});
|
page?: number;
|
||||||
|
page_size?: number;
|
||||||
|
synthesis_type?: string;
|
||||||
|
status?: string;
|
||||||
|
name?: string;
|
||||||
|
};
|
||||||
|
if (searchQuery) params.name = searchQuery;
|
||||||
|
if (filterStatus !== "all") params.synthesis_type = filterStatus;
|
||||||
|
const res = await querySynthesisTasksUsingGet(params);
|
||||||
|
setTasks(res?.data?.content || []);
|
||||||
|
setTotal(res?.data?.totalElements || 0);
|
||||||
|
} catch {
|
||||||
|
setTasks([]);
|
||||||
|
setTotal(0);
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
loadTasks();
|
||||||
|
// eslint-disable-next-line
|
||||||
|
}, [searchQuery, filterStatus, page, pageSize]);
|
||||||
|
|
||||||
// 排序任务
|
|
||||||
const sortedTasks = [...filteredTasks].sort((a, b) => {
|
|
||||||
if (sortBy === "createdAt") {
|
|
||||||
const dateA = new Date(a.createdAt).getTime();
|
|
||||||
const dateB = new Date(b.createdAt).getTime();
|
|
||||||
return sortOrder === "asc" ? dateA - dateB : dateB - dateA;
|
|
||||||
} else if (sortBy === "name") {
|
|
||||||
return sortOrder === "asc"
|
|
||||||
? a.name.localeCompare(b.name)
|
|
||||||
: b.name.localeCompare(a.name);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
});
|
|
||||||
const handleTaskAction = (taskId: number, action: string) => {
|
|
||||||
setTasks((prev) =>
|
|
||||||
prev.map((task) => {
|
|
||||||
if (task.id === taskId) {
|
|
||||||
switch (action) {
|
|
||||||
case "pause":
|
|
||||||
return { ...task, status: "paused" as const };
|
|
||||||
case "resume":
|
|
||||||
return { ...task, status: "running" as const };
|
|
||||||
case "stop":
|
|
||||||
return {
|
|
||||||
...task,
|
|
||||||
status: "failed" as const,
|
|
||||||
progress: task.progress,
|
|
||||||
};
|
|
||||||
default:
|
|
||||||
return task;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return task;
|
|
||||||
})
|
|
||||||
);
|
|
||||||
};
|
|
||||||
// 状态徽章
|
// 状态徽章
|
||||||
const getStatusBadge = (status: string) => {
|
const getStatusBadge = (status: string) => {
|
||||||
const statusConfig = {
|
const statusConfig: Record<string, { label: string; color: string; icon: ElementType }> = {
|
||||||
pending: {
|
pending: { label: "等待中", color: "#F59E0B", icon: Pause },
|
||||||
label: "等待中",
|
running: { label: "运行中", color: "#3B82F6", icon: Play },
|
||||||
color: "#F59E0B",
|
completed: { label: "已完成", color: "#10B981", icon: CheckCircle },
|
||||||
icon: Pause,
|
failed: { label: "失败", color: "#EF4444", icon: Pause },
|
||||||
},
|
paused: { label: "已暂停", color: "#E5E7EB", icon: Pause },
|
||||||
running: {
|
|
||||||
label: "运行中",
|
|
||||||
color: "#3B82F6",
|
|
||||||
icon: Play,
|
|
||||||
},
|
|
||||||
completed: {
|
|
||||||
label: "已完成",
|
|
||||||
color: "#10B981",
|
|
||||||
icon: CheckCircle,
|
|
||||||
},
|
|
||||||
failed: {
|
|
||||||
label: "失败",
|
|
||||||
color: "#EF4444",
|
|
||||||
icon: Pause,
|
|
||||||
},
|
|
||||||
paused: {
|
|
||||||
label: "已暂停",
|
|
||||||
color: "#E5E7EB",
|
|
||||||
icon: Pause,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
return (
|
return statusConfig[status] ?? statusConfig["pending"];
|
||||||
statusConfig[status as keyof typeof statusConfig] || statusConfig.pending
|
|
||||||
);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// 任务表格列
|
// 类型映射
|
||||||
|
const typeMap: Record<string, string> = {
|
||||||
|
QA: "问答对生成",
|
||||||
|
COT: "链式推理生成",
|
||||||
|
};
|
||||||
|
|
||||||
|
// 表格列
|
||||||
const taskColumns = [
|
const taskColumns = [
|
||||||
{
|
{
|
||||||
title: (
|
title: (
|
||||||
@@ -134,98 +138,77 @@ export default function SynthesisTaskTab() {
|
|||||||
dataIndex: "name",
|
dataIndex: "name",
|
||||||
key: "name",
|
key: "name",
|
||||||
fixed: "left" as const,
|
fixed: "left" as const,
|
||||||
render: (text: string, task: SynthesisTask) => (
|
render: (_: unknown, task: SynthesisTask) => (
|
||||||
<div className="flex items-center gap-3">
|
<div className="flex items-center gap-3">
|
||||||
<div className="w-8 h-8 bg-blue-500 rounded-lg flex items-center justify-center shadow-sm">
|
<div className="w-8 h-8 bg-blue-500 rounded-lg flex items-center justify-center shadow-sm">
|
||||||
{/* 可根据 type 渲染不同图标 */}
|
|
||||||
<span className="text-white font-bold text-base">
|
<span className="text-white font-bold text-base">
|
||||||
{task.type?.toUpperCase()?.slice(0, 1) || "T"}
|
{task.synthesis_type?.toUpperCase()?.slice(0, 1) || "T"}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<Link to={`/data/synthesis/task/${task.id}`}>{task.name}</Link>
|
<Link to={`/data/synthesis/task/${task.id}`}>{task.name}</Link>
|
||||||
<div className="text-xs text-gray-500">{task.template}</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
title: "类型",
|
title: "类型",
|
||||||
dataIndex: "type",
|
dataIndex: "synthesis_type",
|
||||||
key: "type",
|
key: "synthesis_type",
|
||||||
render: (type: string) => type.toUpperCase(),
|
render: (type: string) => typeMap[type] || type,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
title: "状态",
|
title: "文件数",
|
||||||
dataIndex: "status",
|
dataIndex: "total_files",
|
||||||
key: "status",
|
key: "total_files",
|
||||||
render: (status: string) => {
|
render: (num: number, task: SynthesisTask) => <span>{num ?? (task.source_file_id?.length ?? 0)}</span>,
|
||||||
const statusConfig = getStatusBadge(status);
|
|
||||||
return <Badge color={statusConfig.color} text={statusConfig.label} />;
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: "进度",
|
|
||||||
dataIndex: "progress",
|
|
||||||
key: "progress",
|
|
||||||
width: 150,
|
|
||||||
render: (_: any, task: SynthesisTask) => (
|
|
||||||
<Progress percent={task.progress} size="small" />
|
|
||||||
),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: "源数据集",
|
|
||||||
dataIndex: "sourceDataset",
|
|
||||||
key: "sourceDataset",
|
|
||||||
render: (text: string) => (
|
|
||||||
<div className="text-sm text-gray-900">{text}</div>
|
|
||||||
),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: "生成数量",
|
|
||||||
dataIndex: "generatedCount",
|
|
||||||
key: "generatedCount",
|
|
||||||
render: (_: any, task: SynthesisTask) => (
|
|
||||||
<div className="text-sm font-medium text-gray-900">
|
|
||||||
{task.generatedCount?.toLocaleString?.()} /{" "}
|
|
||||||
{task.targetCount?.toLocaleString?.()}
|
|
||||||
</div>
|
|
||||||
),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: "质量评分",
|
|
||||||
dataIndex: "quality",
|
|
||||||
key: "quality",
|
|
||||||
render: (quality: number) => (quality ? `${quality}%` : "-"),
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
title: "创建时间",
|
title: "创建时间",
|
||||||
dataIndex: "createdAt",
|
dataIndex: "created_at",
|
||||||
key: "createdAt",
|
key: "created_at",
|
||||||
render: formatDateTime,
|
render: (val: string) => formatDateTime(val),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
title: "操作",
|
title: "操作",
|
||||||
key: "actions",
|
key: "actions",
|
||||||
fixed: "right" as const,
|
fixed: "right" as const,
|
||||||
render: (_: any, task: SynthesisTask) => (
|
render: (_: unknown, task: SynthesisTask) => (
|
||||||
<div className="flex items-center justify-center gap-1">
|
<div className="flex items-center justify-center gap-1">
|
||||||
{task.status === "running" && (
|
<Tooltip title="查看详情">
|
||||||
<Button
|
<Button
|
||||||
onClick={() => handleTaskAction(task.id, "pause")}
|
onClick={() => navigate(`/data/synthesis/task/${task.id}`)}
|
||||||
className="hover:bg-orange-50 p-1 h-7 w-7"
|
className="hover:bg-blue-50 p-1 h-7 w-7"
|
||||||
type="text"
|
type="text"
|
||||||
icon={<Pause className="w-4 h-4" />}
|
icon={<EyeOutlined />}
|
||||||
></Button>
|
/>
|
||||||
)}
|
</Tooltip>
|
||||||
{task.status === "paused" && (
|
<Tooltip title="删除任务">
|
||||||
<Button
|
<Button
|
||||||
onClick={() => handleTaskAction(task.id, "resume")}
|
danger
|
||||||
className="hover:bg-green-50 p-1 h-7 w-7"
|
|
||||||
type="text"
|
type="text"
|
||||||
icon={<Play className="w-4 h-4" />}
|
className="hover:bg-red-50 p-1 h-7 w-7"
|
||||||
></Button>
|
icon={<DeleteOutlined />}
|
||||||
)}
|
onClick={() => {
|
||||||
|
Modal.confirm({
|
||||||
|
title: `确认删除任务?`,
|
||||||
|
content: `任务名:${task.name}`,
|
||||||
|
okText: "删除",
|
||||||
|
okType: "danger",
|
||||||
|
cancelText: "取消",
|
||||||
|
onOk: async () => {
|
||||||
|
try {
|
||||||
|
await deleteSynthesisTaskByIdUsingDelete(task.id);
|
||||||
|
message.success("删除成功");
|
||||||
|
loadTasks();
|
||||||
|
} catch {
|
||||||
|
message.error("删除失败");
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</Tooltip>
|
||||||
</div>
|
</div>
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
@@ -237,18 +220,15 @@ export default function SynthesisTaskTab() {
|
|||||||
<SearchControls
|
<SearchControls
|
||||||
searchTerm={searchQuery}
|
searchTerm={searchQuery}
|
||||||
onSearchChange={setSearchQuery}
|
onSearchChange={setSearchQuery}
|
||||||
searchPlaceholder="搜索任务名称或模板..."
|
searchPlaceholder="搜索任务名称..."
|
||||||
filters={[
|
filters={[
|
||||||
{
|
{
|
||||||
key: "status",
|
key: "status",
|
||||||
label: "状态",
|
label: "类型",
|
||||||
options: [
|
options: [
|
||||||
{ label: "全部状态", value: "all" },
|
{ label: "全部类型", value: "all" },
|
||||||
{ label: "等待中", value: "pending" },
|
{ label: "问答对生成", value: "QA" },
|
||||||
{ label: "运行中", value: "running" },
|
{ label: "链式推理生成", value: "COT" },
|
||||||
{ label: "已完成", value: "completed" },
|
|
||||||
{ label: "失败", value: "failed" },
|
|
||||||
{ label: "已暂停", value: "paused" },
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
]}
|
]}
|
||||||
@@ -259,13 +239,23 @@ export default function SynthesisTaskTab() {
|
|||||||
showFilters
|
showFilters
|
||||||
showViewToggle={false}
|
showViewToggle={false}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
{/* 任务表格 */}
|
{/* 任务表格 */}
|
||||||
<Card>
|
<Card>
|
||||||
<Table
|
<Table
|
||||||
columns={taskColumns}
|
columns={taskColumns}
|
||||||
dataSource={sortedTasks}
|
dataSource={tasks}
|
||||||
rowKey="id"
|
rowKey="id"
|
||||||
|
loading={loading}
|
||||||
|
pagination={{
|
||||||
|
current: page,
|
||||||
|
pageSize: pageSize,
|
||||||
|
total: total,
|
||||||
|
onChange: (p, ps) => {
|
||||||
|
setPage(p);
|
||||||
|
setPageSize(ps);
|
||||||
|
},
|
||||||
|
showSizeChanger: true,
|
||||||
|
}}
|
||||||
scroll={{ x: "max-content" }}
|
scroll={{ x: "max-content" }}
|
||||||
locale={{
|
locale={{
|
||||||
emptyText: (
|
emptyText: (
|
||||||
|
|||||||
37
frontend/src/pages/SynthesisTask/synthesis-api.ts
Normal file
37
frontend/src/pages/SynthesisTask/synthesis-api.ts
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import { get, post, del } from "@/utils/request";
|
||||||
|
|
||||||
|
// 创建数据合成任务
|
||||||
|
export function createSynthesisTaskUsingPost(data: unknown) {
|
||||||
|
return post("/api/synthesis/gen/task", data);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取数据合成任务详情
|
||||||
|
export function querySynthesisTaskByIdUsingGet(taskId: string) {
|
||||||
|
return get(`/api/synthesis/gen/task/${taskId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 分页查询数据合成任务列表
|
||||||
|
export function querySynthesisTasksUsingGet(params: {
|
||||||
|
page?: number;
|
||||||
|
page_size?: number;
|
||||||
|
synthesis_type?: string;
|
||||||
|
status?: string;
|
||||||
|
name?: string;
|
||||||
|
}) {
|
||||||
|
return get(`/api/synthesis/gen/tasks`, params as any);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 删除整个数据合成任务
|
||||||
|
export function deleteSynthesisTaskByIdUsingDelete(taskId: string) {
|
||||||
|
return del(`/api/synthesis/gen/task/${taskId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 分页查询某个任务下的文件任务列表
|
||||||
|
export function querySynthesisFileTasksUsingGet(taskId: string, params: { page?: number; page_size?: number }) {
|
||||||
|
return get(`/api/synthesis/gen/task/${taskId}/files`, params as any);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取不同合成类型对应的 Prompt
|
||||||
|
export function getPromptByTypeUsingGet(synthType: string) {
|
||||||
|
return get(`/api/synthesis/gen/prompt`, { synth_type: synthType } as any);
|
||||||
|
}
|
||||||
@@ -40,6 +40,7 @@ import { withErrorBoundary } from "@/components/ErrorBoundary";
|
|||||||
import AgentPage from "@/pages/Agent/Agent.tsx";
|
import AgentPage from "@/pages/Agent/Agent.tsx";
|
||||||
import RatioTaskDetail from "@/pages/RatioTask/Detail/RatioTaskDetail";
|
import RatioTaskDetail from "@/pages/RatioTask/Detail/RatioTaskDetail";
|
||||||
import CleansingTemplateDetail from "@/pages/DataCleansing/Detail/TemplateDetail";
|
import CleansingTemplateDetail from "@/pages/DataCleansing/Detail/TemplateDetail";
|
||||||
|
import SynthFileTask from "@/pages/SynthesisTask/SynthFileTask.tsx";
|
||||||
import EvaluationDetailPage from "@/pages/DataEvaluation/Detail/TaskDetail.tsx";
|
import EvaluationDetailPage from "@/pages/DataEvaluation/Detail/TaskDetail.tsx";
|
||||||
|
|
||||||
const router = createBrowserRouter([
|
const router = createBrowserRouter([
|
||||||
@@ -160,6 +161,7 @@ const router = createBrowserRouter([
|
|||||||
path: "create",
|
path: "create",
|
||||||
Component: SynthesisTaskCreate,
|
Component: SynthesisTaskCreate,
|
||||||
},
|
},
|
||||||
|
{path: ":id", Component: SynthFileTask},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
|
|
||||||
router = APIRouter(
|
router = APIRouter(
|
||||||
prefix="/synth",
|
prefix="/synthesis",
|
||||||
tags = ["synth"]
|
tags = ["synthesis"]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Include sub-routers
|
# Include sub-routers
|
||||||
|
|||||||
@@ -18,7 +18,14 @@ from app.db.session import get_db
|
|||||||
from app.module.generation.schema.generation import (
|
from app.module.generation.schema.generation import (
|
||||||
CreateSynthesisTaskRequest,
|
CreateSynthesisTaskRequest,
|
||||||
DataSynthesisTaskItem,
|
DataSynthesisTaskItem,
|
||||||
PagedDataSynthesisTaskResponse, SynthesisType)
|
PagedDataSynthesisTaskResponse,
|
||||||
|
SynthesisType,
|
||||||
|
DataSynthesisFileTaskItem,
|
||||||
|
PagedDataSynthesisFileTaskResponse,
|
||||||
|
DataSynthesisChunkItem,
|
||||||
|
PagedDataSynthesisChunkResponse,
|
||||||
|
SynthesisDataItem,
|
||||||
|
)
|
||||||
from app.module.generation.service.generation_service import GenerationService
|
from app.module.generation.service.generation_service import GenerationService
|
||||||
from app.module.generation.service.prompt import get_prompt
|
from app.module.generation.service.prompt import get_prompt
|
||||||
from app.module.shared.schema import StandardResponse
|
from app.module.shared.schema import StandardResponse
|
||||||
@@ -219,19 +226,26 @@ async def delete_synthesis_task(
|
|||||||
data=None,
|
data=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/task/{task_id}/{file_id}", response_model=StandardResponse[None])
|
@router.delete("/task/{task_id}/{file_id}", response_model=StandardResponse[None])
|
||||||
async def delete_synthesis_file_task(
|
async def delete_synthesis_file_task(
|
||||||
task_id: str,
|
task_id: str,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
db: AsyncSession = Depends(get_db)
|
db: AsyncSession = Depends(get_db)
|
||||||
):
|
):
|
||||||
"""删除数据合成任务中的文件任务"""
|
"""删除数据合成任务中的文件任务,同时刷新任务表中的文件/切片数量"""
|
||||||
|
# 先获取任务和文件任务记录
|
||||||
|
task = await db.get(DataSynthesisInstance, task_id)
|
||||||
|
if not task:
|
||||||
|
raise HTTPException(status_code=404, detail="Synthesis task not found")
|
||||||
|
|
||||||
file_task = await db.get(DataSynthesisFileInstance, file_id)
|
file_task = await db.get(DataSynthesisFileInstance, file_id)
|
||||||
if not file_task:
|
if not file_task:
|
||||||
raise HTTPException(status_code=404, detail="Synthesis file task not found")
|
raise HTTPException(status_code=404, detail="Synthesis file task not found")
|
||||||
|
|
||||||
# 删除 SynthesisData(根据文件任务ID)
|
# 删除 SynthesisData(根据文件任务ID)
|
||||||
await db.execute(delete(SynthesisData).where(
|
await db.execute(
|
||||||
|
delete(SynthesisData).where(
|
||||||
SynthesisData.synthesis_file_instance_id == file_id
|
SynthesisData.synthesis_file_instance_id == file_id
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -243,11 +257,28 @@ async def delete_synthesis_file_task(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# 删除文件任务记录
|
# 删除文件任务记录
|
||||||
await db.execute(delete(DataSynthesisFileInstance).where(
|
await db.execute(
|
||||||
|
delete(DataSynthesisFileInstance).where(
|
||||||
DataSynthesisFileInstance.id == file_id
|
DataSynthesisFileInstance.id == file_id
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 刷新任务级别统计字段:总文件数、总文本块数、已处理文本块数
|
||||||
|
if task.total_files and task.total_files > 0:
|
||||||
|
task.total_files -= 1
|
||||||
|
if task.total_files < 0:
|
||||||
|
task.total_files = 0
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(task)
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=200,
|
||||||
|
message="success",
|
||||||
|
data=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/prompt", response_model=StandardResponse[str])
|
@router.get("/prompt", response_model=StandardResponse[str])
|
||||||
async def get_prompt_by_type(
|
async def get_prompt_by_type(
|
||||||
synth_type: SynthesisType,
|
synth_type: SynthesisType,
|
||||||
@@ -258,3 +289,157 @@ async def get_prompt_by_type(
|
|||||||
message="Success",
|
message="Success",
|
||||||
data=prompt,
|
data=prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/task/{task_id}/files", response_model=StandardResponse[PagedDataSynthesisFileTaskResponse])
|
||||||
|
async def list_synthesis_file_tasks(
|
||||||
|
task_id: str,
|
||||||
|
page: int = 1,
|
||||||
|
page_size: int = 10,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""分页获取某个数据合成任务下的文件任务列表"""
|
||||||
|
# 先校验任务是否存在
|
||||||
|
task = await db.get(DataSynthesisInstance, task_id)
|
||||||
|
if not task:
|
||||||
|
raise HTTPException(status_code=404, detail="Synthesis task not found")
|
||||||
|
|
||||||
|
base_query = select(DataSynthesisFileInstance).where(
|
||||||
|
DataSynthesisFileInstance.synthesis_instance_id == task_id
|
||||||
|
)
|
||||||
|
|
||||||
|
count_q = select(func.count()).select_from(base_query.subquery())
|
||||||
|
total = (await db.execute(count_q)).scalar_one()
|
||||||
|
|
||||||
|
if page < 1:
|
||||||
|
page = 1
|
||||||
|
if page_size < 1:
|
||||||
|
page_size = 10
|
||||||
|
|
||||||
|
result = await db.execute(
|
||||||
|
base_query.offset((page - 1) * page_size).limit(page_size)
|
||||||
|
)
|
||||||
|
rows = result.scalars().all()
|
||||||
|
|
||||||
|
file_items = [
|
||||||
|
DataSynthesisFileTaskItem(
|
||||||
|
id=row.id,
|
||||||
|
synthesis_instance_id=row.synthesis_instance_id,
|
||||||
|
file_name=row.file_name,
|
||||||
|
source_file_id=row.source_file_id,
|
||||||
|
target_file_location=row.target_file_location,
|
||||||
|
status=row.status,
|
||||||
|
total_chunks=row.total_chunks,
|
||||||
|
processed_chunks=row.processed_chunks,
|
||||||
|
created_at=row.created_at,
|
||||||
|
updated_at=row.updated_at,
|
||||||
|
created_by=row.created_by,
|
||||||
|
updated_by=row.updated_by,
|
||||||
|
)
|
||||||
|
for row in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
paged = PagedDataSynthesisFileTaskResponse(
|
||||||
|
content=file_items,
|
||||||
|
totalElements=total,
|
||||||
|
totalPages=(total + page_size - 1) // page_size,
|
||||||
|
page=page,
|
||||||
|
size=page_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=200,
|
||||||
|
message="Success",
|
||||||
|
data=paged,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/file/{file_id}/chunks", response_model=StandardResponse[PagedDataSynthesisChunkResponse])
|
||||||
|
async def list_chunks_by_file(
|
||||||
|
file_id: str,
|
||||||
|
page: int = 1,
|
||||||
|
page_size: int = 10,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""根据文件任务 ID 分页查询 chunk 记录"""
|
||||||
|
# 校验文件任务是否存在
|
||||||
|
file_task = await db.get(DataSynthesisFileInstance, file_id)
|
||||||
|
if not file_task:
|
||||||
|
raise HTTPException(status_code=404, detail="Synthesis file task not found")
|
||||||
|
|
||||||
|
base_query = select(DataSynthesisChunkInstance).where(
|
||||||
|
DataSynthesisChunkInstance.synthesis_file_instance_id == file_id
|
||||||
|
)
|
||||||
|
|
||||||
|
count_q = select(func.count()).select_from(base_query.subquery())
|
||||||
|
total = (await db.execute(count_q)).scalar_one()
|
||||||
|
|
||||||
|
if page < 1:
|
||||||
|
page = 1
|
||||||
|
if page_size < 1:
|
||||||
|
page_size = 10
|
||||||
|
|
||||||
|
result = await db.execute(
|
||||||
|
base_query.order_by(DataSynthesisChunkInstance.chunk_index.asc())
|
||||||
|
.offset((page - 1) * page_size)
|
||||||
|
.limit(page_size)
|
||||||
|
)
|
||||||
|
rows = result.scalars().all()
|
||||||
|
|
||||||
|
chunk_items = [
|
||||||
|
DataSynthesisChunkItem(
|
||||||
|
id=row.id,
|
||||||
|
synthesis_file_instance_id=row.synthesis_file_instance_id,
|
||||||
|
chunk_index=row.chunk_index,
|
||||||
|
chunk_content=row.chunk_content,
|
||||||
|
chunk_metadata=getattr(row, "chunk_metadata", None),
|
||||||
|
)
|
||||||
|
for row in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
paged = PagedDataSynthesisChunkResponse(
|
||||||
|
content=chunk_items,
|
||||||
|
totalElements=total,
|
||||||
|
totalPages=(total + page_size - 1) // page_size,
|
||||||
|
page=page,
|
||||||
|
size=page_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=200,
|
||||||
|
message="Success",
|
||||||
|
data=paged,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/chunk/{chunk_id}/data", response_model=StandardResponse[list[SynthesisDataItem]])
|
||||||
|
async def list_synthesis_data_by_chunk(
|
||||||
|
chunk_id: str,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""根据 chunk ID 查询所有合成结果数据"""
|
||||||
|
# 可选:校验 chunk 是否存在
|
||||||
|
chunk = await db.get(DataSynthesisChunkInstance, chunk_id)
|
||||||
|
if not chunk:
|
||||||
|
raise HTTPException(status_code=404, detail="Chunk not found")
|
||||||
|
|
||||||
|
result = await db.execute(
|
||||||
|
select(SynthesisData).where(SynthesisData.chunk_instance_id == chunk_id)
|
||||||
|
)
|
||||||
|
rows = result.scalars().all()
|
||||||
|
|
||||||
|
items = [
|
||||||
|
SynthesisDataItem(
|
||||||
|
id=row.id,
|
||||||
|
data=row.data,
|
||||||
|
synthesis_file_instance_id=row.synthesis_file_instance_id,
|
||||||
|
chunk_instance_id=row.chunk_instance_id,
|
||||||
|
)
|
||||||
|
for row in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=200,
|
||||||
|
message="Success",
|
||||||
|
data=items,
|
||||||
|
)
|
||||||
|
|||||||
@@ -70,6 +70,67 @@ class PagedDataSynthesisTaskResponse(BaseModel):
|
|||||||
page: int
|
page: int
|
||||||
size: int
|
size: int
|
||||||
|
|
||||||
|
|
||||||
|
class DataSynthesisFileTaskItem(BaseModel):
|
||||||
|
"""数据合成任务下的文件任务项"""
|
||||||
|
id: str
|
||||||
|
synthesis_instance_id: str
|
||||||
|
file_name: str
|
||||||
|
source_file_id: str
|
||||||
|
target_file_location: str
|
||||||
|
status: Optional[str] = None
|
||||||
|
total_chunks: int
|
||||||
|
processed_chunks: int
|
||||||
|
created_at: Optional[datetime] = None
|
||||||
|
updated_at: Optional[datetime] = None
|
||||||
|
created_by: Optional[str] = None
|
||||||
|
updated_by: Optional[str] = None
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
orm_mode = True
|
||||||
|
|
||||||
|
|
||||||
|
class PagedDataSynthesisFileTaskResponse(BaseModel):
|
||||||
|
"""分页数据合成任务文件任务响应"""
|
||||||
|
content: List[DataSynthesisFileTaskItem]
|
||||||
|
totalElements: int
|
||||||
|
totalPages: int
|
||||||
|
page: int
|
||||||
|
size: int
|
||||||
|
|
||||||
|
|
||||||
|
class DataSynthesisChunkItem(BaseModel):
|
||||||
|
"""数据合成文件下的 chunk 记录"""
|
||||||
|
id: str
|
||||||
|
synthesis_file_instance_id: str
|
||||||
|
chunk_index: Optional[int] = None
|
||||||
|
chunk_content: Optional[str] = None
|
||||||
|
chunk_metadata: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
orm_mode = True
|
||||||
|
|
||||||
|
|
||||||
|
class PagedDataSynthesisChunkResponse(BaseModel):
|
||||||
|
"""分页 chunk 列表响应"""
|
||||||
|
content: List[DataSynthesisChunkItem]
|
||||||
|
totalElements: int
|
||||||
|
totalPages: int
|
||||||
|
page: int
|
||||||
|
size: int
|
||||||
|
|
||||||
|
|
||||||
|
class SynthesisDataItem(BaseModel):
|
||||||
|
"""合成结果数据项"""
|
||||||
|
id: str
|
||||||
|
data: Optional[Dict[str, Any]] = None
|
||||||
|
synthesis_file_instance_id: str
|
||||||
|
chunk_instance_id: str
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
orm_mode = True
|
||||||
|
|
||||||
|
|
||||||
class ChatRequest(BaseModel):
|
class ChatRequest(BaseModel):
|
||||||
"""聊天请求参数"""
|
"""聊天请求参数"""
|
||||||
model_id: str
|
model_id: str
|
||||||
|
|||||||
@@ -168,11 +168,11 @@ class GenerationService:
|
|||||||
self.db.add(chunk_record)
|
self.db.add(chunk_record)
|
||||||
|
|
||||||
# 更新文件任务的分块数量
|
# 更新文件任务的分块数量
|
||||||
file_task.chunk_count = len(chunks)
|
file_task.total_chunks = len(chunks)
|
||||||
file_task.status = "processing"
|
file_task.status = "processing"
|
||||||
|
|
||||||
await self.db.refresh(file_task)
|
|
||||||
await self.db.commit()
|
await self.db.commit()
|
||||||
|
await self.db.refresh(file_task)
|
||||||
|
|
||||||
async def _invoke_llm_for_chunks(
|
async def _invoke_llm_for_chunks(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from app.module.generation.schema.generation import SynthesisType
|
from app.module.generation.schema.generation import SynthesisType
|
||||||
|
|
||||||
QA_PROMPT="""
|
QA_PROMPT="""# 角色
|
||||||
# 角色
|
|
||||||
你是一位专业的AI助手,擅长从给定的文本中提取关键信息并创建用于教学和测试的问答对。
|
你是一位专业的AI助手,擅长从给定的文本中提取关键信息并创建用于教学和测试的问答对。
|
||||||
|
|
||||||
# 任务
|
# 任务
|
||||||
@@ -11,7 +10,7 @@ QA_PROMPT="""
|
|||||||
{document}
|
{document}
|
||||||
|
|
||||||
# 要求与指令
|
# 要求与指令
|
||||||
1. **问题类型**:生成{synthesis_count - 1}-{synthesis_count + 1}个问答对。问题类型应多样化,包括但不限于:
|
1. **问题类型**:生成 {synthesis_count} 个左右的问答对。问题类型应多样化,包括但不限于:
|
||||||
* **事实性**:基于文本中明确提到的事实。
|
* **事实性**:基于文本中明确提到的事实。
|
||||||
* **理解性**:需要理解上下文和概念。
|
* **理解性**:需要理解上下文和概念。
|
||||||
* **归纳性**:需要总结或归纳多个信息点。
|
* **归纳性**:需要总结或归纳多个信息点。
|
||||||
@@ -30,8 +29,7 @@ QA_PROMPT="""
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
COT_PROMPT="""
|
COT_PROMPT="""# 角色
|
||||||
# 角色
|
|
||||||
你是一位专业的数据合成专家,擅长基于给定的原始文档和 COT(Chain of Thought,思维链)逻辑,生成高质量、符合实际应用场景的 COT 数据。COT 数据需包含清晰的问题、逐步推理过程和最终结论,能完整还原解决问题的思考路径。
|
你是一位专业的数据合成专家,擅长基于给定的原始文档和 COT(Chain of Thought,思维链)逻辑,生成高质量、符合实际应用场景的 COT 数据。COT 数据需包含清晰的问题、逐步推理过程和最终结论,能完整还原解决问题的思考路径。
|
||||||
|
|
||||||
# 任务
|
# 任务
|
||||||
@@ -41,7 +39,7 @@ COT_PROMPT="""
|
|||||||
{document}
|
{document}
|
||||||
|
|
||||||
# 要求与指令
|
# 要求与指令
|
||||||
1. **数量要求**:生成 {min\_count}-{max\_count} 条 COT 数据(min\_count={synthesis\_count-1},max\_count={synthesis\_count+1})。
|
1. **数量要求**:生成 {synthesis_count} 条左右的 COT 数据。
|
||||||
2. **内容要求**:
|
2. **内容要求**:
|
||||||
* 每条 COT 数据需包含 “问题”“思维链推理”“最终结论” 三部分,逻辑闭环,推理步骤清晰、连贯,不跳跃关键环节。
|
* 每条 COT 数据需包含 “问题”“思维链推理”“最终结论” 三部分,逻辑闭环,推理步骤清晰、连贯,不跳跃关键环节。
|
||||||
* 问题需基于文档中的事实信息、概念关联或逻辑疑问,是读完文档后自然产生的有价值问题(避免无意义或过于简单的问题)。
|
* 问题需基于文档中的事实信息、概念关联或逻辑疑问,是读完文档后自然产生的有价值问题(避免无意义或过于简单的问题)。
|
||||||
|
|||||||
Reference in New Issue
Block a user