feat(data-management): 添加数据集相似度推荐功能

- 在DatasetApplicationService中实现getSimilarDatasets方法,支持基于标签匹配的相似数据集推荐
- 新增normalizeSimilarLimit、normalizeTagNames、countSharedTags等辅助方法用于相似度计算
- 在DatasetRepository接口及其实现类中添加findSimilarByTags方法,支持数据库层面的标签匹配查询
- 在DatasetController中暴露/similar REST API端点,支持按需获取相似数据集
- 在前端Overview组件中展示相似数据集表格,包含名称、标签、类型、文件数和更新时间等信息
- 在DatasetDetail页面集成相似数据集获取逻辑,限制默认返回数量为4条
- 移除KnowledgeItem中的冗余title字段,统一使用其他标识信息
- 优化知识管理相关组件中的标题显示逻辑,移除硬编码标题值
This commit is contained in:
2026-01-30 11:43:44 +08:00
parent c51cd2b6e4
commit c221666e67
12 changed files with 481 additions and 98 deletions

View File

@@ -1,4 +1,4 @@
import { useEffect, useMemo, useState } from "react";
import { useEffect, useMemo, useRef, useState } from "react";
import { Breadcrumb, App, Tabs, Table, Tag } from "antd";
import {
ReloadOutlined,
@@ -19,6 +19,7 @@ import {
queryDatasetByIdUsingGet,
queryDatasetsUsingGet,
queryDatasetTagsUsingGet,
querySimilarDatasetsUsingGet,
updateDatasetByIdUsingPut,
} from "../dataset.api";
import DataQuality from "./components/DataQuality";
@@ -26,8 +27,10 @@ import DataLineageFlow from "./components/DataLineageFlow";
import Overview from "./components/Overview";
import { Activity, Clock, File, FileType } from "lucide-react";
import EditDataset from "../Create/EditDataset";
import ImportConfiguration from "./components/ImportConfiguration";
import ImportConfiguration from "./components/ImportConfiguration";
const SIMILAR_DATASET_LIMIT = 4;
export default function DatasetDetail() {
const { id } = useParams(); // 获取动态路由参数
const navigate = useNavigate();
@@ -39,9 +42,61 @@ export default function DatasetDetail() {
const [parentDataset, setParentDataset] = useState<Dataset | null>(null);
const [childDatasets, setChildDatasets] = useState<Dataset[]>([]);
const [childDatasetsLoading, setChildDatasetsLoading] = useState(false);
const [similarDatasets, setSimilarDatasets] = useState<Dataset[]>([]);
const [similarDatasetsLoading, setSimilarDatasetsLoading] = useState(false);
const [similarTagNames, setSimilarTagNames] = useState<string[]>([]);
const similarRequestRef = useRef(0);
const filesOperation = useFilesOperation(dataset);
const [showUploadDialog, setShowUploadDialog] = useState(false);
const normalizeTagNames = (
tags?: Array<string | { name?: string | null } | null>
) => {
if (!tags || tags.length === 0) {
return [];
}
const names = tags
.map((tag) => (typeof tag === "string" ? tag : tag?.name))
.filter((name): name is string => !!name && name.trim().length > 0)
.map((name) => name.trim());
return Array.from(new Set(names));
};
const fetchSimilarDatasets = async (currentDataset: Dataset) => {
const requestId = similarRequestRef.current + 1;
similarRequestRef.current = requestId;
if (!currentDataset?.id) {
setSimilarDatasets([]);
setSimilarTagNames([]);
setSimilarDatasetsLoading(false);
return;
}
const tagNames = normalizeTagNames(
currentDataset.tags as Array<string | { name?: string }>
);
setSimilarTagNames(tagNames);
setSimilarDatasets([]);
if (tagNames.length === 0) {
setSimilarDatasetsLoading(false);
return;
}
setSimilarDatasetsLoading(true);
try {
const { data } = await querySimilarDatasetsUsingGet(currentDataset.id, {
limit: SIMILAR_DATASET_LIMIT,
});
if (similarRequestRef.current !== requestId) {
return;
}
const list = Array.isArray(data) ? data : [];
setSimilarDatasets(list.map((item) => mapDataset(item)));
} catch (error) {
console.error("Failed to fetch similar datasets:", error);
} finally {
if (similarRequestRef.current === requestId) {
setSimilarDatasetsLoading(false);
}
}
};
const navigateItems = useMemo(() => {
const items = [
{
@@ -110,6 +165,7 @@ export default function DatasetDetail() {
const { data } = await queryDatasetByIdUsingGet(id);
const mapped = mapDataset(data);
setDataset(mapped);
fetchSimilarDatasets(mapped);
if (data?.parentDatasetId) {
const { data: parentData } = await queryDatasetByIdUsingGet(
data.parentDatasetId
@@ -351,6 +407,9 @@ export default function DatasetDetail() {
filesOperation={filesOperation}
fetchDataset={fetchDataset}
onUpload={() => setShowUploadDialog(true)}
similarDatasets={similarDatasets}
similarDatasetsLoading={similarDatasetsLoading}
similarTags={similarTagNames}
/>
)}
{activeTab === "children" && (

View File

@@ -1,8 +1,19 @@
import { App, Button, Descriptions, DescriptionsProps, Modal, Table, Input } from "antd";
import { formatBytes, formatDateTime } from "@/utils/unit";
import { Download, Trash2, Folder, File } from "lucide-react";
import {
App,
Button,
Descriptions,
DescriptionsProps,
Modal,
Table,
Input,
Tag,
} from "antd";
import { formatBytes, formatDateTime } from "@/utils/unit";
import { Download, Trash2, Folder, File } from "lucide-react";
import { datasetTypeMap } from "../../dataset.const";
import type { DatasetFile } from "@/pages/DataManagement/dataset.model";
import type { Dataset, DatasetFile } from "@/pages/DataManagement/dataset.model";
import { Link } from "react-router";
import type { useFilesOperation } from "../useFilesOperation";
type DatasetFileRow = DatasetFile & {
fileSize?: number;
@@ -18,14 +29,29 @@ const PREVIEW_MODAL_WIDTH = {
const PREVIEW_TEXT_FONT_SIZE = 12;
const PREVIEW_TEXT_PADDING = 12;
const PREVIEW_AUDIO_PADDING = 40;
const SIMILAR_TAGS_PREVIEW_LIMIT = 3;
const SIMILAR_DATASET_TAG_PREVIEW_LIMIT = 4;
type OverviewProps = {
dataset: Dataset;
filesOperation: ReturnType<typeof useFilesOperation>;
fetchDataset: () => void;
onUpload?: () => void;
similarDatasets: Dataset[];
similarDatasetsLoading: boolean;
similarTags: string[];
};
export default function Overview({
dataset,
filesOperation,
fetchDataset,
onUpload,
}) {
const { modal, message } = App.useApp();
similarDatasets,
similarDatasetsLoading,
similarTags,
}: OverviewProps) {
const { modal, message } = App.useApp();
const {
fileList,
pagination,
@@ -46,6 +72,82 @@ export default function Overview({
handleDeleteDirectory,
handlePreviewFile,
} = filesOperation;
const similarTagsSummary = (() => {
if (!similarTags || similarTags.length === 0) {
return "";
}
const visibleTags = similarTags.slice(0, SIMILAR_TAGS_PREVIEW_LIMIT);
const hiddenCount = similarTags.length - visibleTags.length;
if (hiddenCount > 0) {
return `${visibleTags.join("、")}${similarTags.length}`;
}
return visibleTags.join("、");
})();
const renderDatasetTags = (
tags?: Array<string | { name?: string; color?: string } | null>
) => {
if (!tags || tags.length === 0) {
return "-";
}
const visibleTags = tags.slice(0, SIMILAR_DATASET_TAG_PREVIEW_LIMIT);
const hiddenCount = tags.length - visibleTags.length;
return (
<div className="flex flex-wrap gap-1">
{visibleTags.map((tag, index) => {
const tagName = typeof tag === "string" ? tag : tag?.name;
if (!tagName) {
return null;
}
const tagColor = typeof tag === "string" ? undefined : tag?.color;
return (
<Tag key={`${tagName}-${index}`} color={tagColor}>
{tagName}
</Tag>
);
})}
{hiddenCount > 0 && <Tag>+{hiddenCount}</Tag>}
</div>
);
};
const similarColumns = [
{
title: "名称",
dataIndex: "name",
key: "name",
render: (_: string, record: Dataset) => (
<Link to={`/data/management/detail/${record.id}`}>{record.name}</Link>
),
},
{
title: "标签",
dataIndex: "tags",
key: "tags",
render: (tags: Array<string | { name?: string; color?: string }>) =>
renderDatasetTags(tags),
},
{
title: "类型",
dataIndex: "datasetType",
key: "datasetType",
width: 120,
render: (_: string, record: Dataset) =>
datasetTypeMap[record.datasetType as keyof typeof datasetTypeMap]?.label ||
"未知",
},
{
title: "文件数",
dataIndex: "fileCount",
key: "fileCount",
width: 120,
render: (value?: number) => value ?? 0,
},
{
title: "更新时间",
dataIndex: "updatedAt",
key: "updatedAt",
width: 180,
},
];
// 基本信息
const items: DescriptionsProps["items"] = [
@@ -265,7 +367,32 @@ export default function Overview({
column={5}
/>
{/* 文件列表 */}
{/* 相似数据集 */}
<div className="mt-8">
<div className="flex items-center justify-between mb-3">
<h2 className="text-base font-semibold"></h2>
{similarTagsSummary && (
<span className="text-xs text-gray-500">
{similarTagsSummary}
</span>
)}
</div>
<Table
size="small"
rowKey="id"
columns={similarColumns}
dataSource={similarDatasets}
loading={similarDatasetsLoading}
pagination={false}
locale={{
emptyText: similarTags?.length
? "暂无相似数据集"
: "当前数据集未设置标签",
}}
/>
</div>
{/* 文件列表 */}
<div className="flex items-center justify-between mt-8 mb-2">
<h2 className="text-base font-semibold"></h2>
<div className="flex items-center gap-2">

View File

@@ -1,4 +1,8 @@
import { get, post, put, del, download } from "@/utils/request";
import { get, post, put, del, download } from "@/utils/request";
type RequestParams = Record<string, unknown>;
type RequestPayload = Record<string, unknown>;
type UploadChunkConfig = Record<string, unknown>;
// 数据集统计接口
export function getDatasetStatisticsUsingGet() {
@@ -10,24 +14,35 @@ export function queryDatasetStatisticsByIdUsingGet(id: string | number) {
}
// 查询数据集列表
export function queryDatasetsUsingGet(params?: any) {
return get("/api/data-management/datasets", params);
}
export function queryDatasetsUsingGet(params?: RequestParams) {
return get("/api/data-management/datasets", params);
}
// 创建数据集
export function createDatasetUsingPost(data: RequestPayload) {
return post("/api/data-management/datasets", data);
}
// 创建数据集
export function createDatasetUsingPost(data: any) {
return post("/api/data-management/datasets", data);
}
// 根据ID获取数据集详情
export function queryDatasetByIdUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}`);
}
// 根据ID获取数据集详情
export function queryDatasetByIdUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}`);
}
// 获取相似数据集
export function querySimilarDatasetsUsingGet(
id: string | number,
params?: { limit?: number }
) {
return get(`/api/data-management/datasets/${id}/similar`, params);
}
// 更新数据集
export function updateDatasetByIdUsingPut(id: string | number, data: any) {
return put(`/api/data-management/datasets/${id}`, data);
}
export function updateDatasetByIdUsingPut(
id: string | number,
data: RequestPayload
) {
return put(`/api/data-management/datasets/${id}`, data);
}
// 删除数据集
export function deleteDatasetByIdUsingDelete(id: string | number) {
@@ -40,19 +55,28 @@ export function downloadDatasetUsingGet(id: string | number) {
}
// 验证数据集
export function validateDatasetUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/validate`, data);
}
export function validateDatasetUsingPost(
id: string | number,
data?: RequestPayload
) {
return post(`/api/data-management/datasets/${id}/validate`, data);
}
// 获取数据集文件列表
export function queryDatasetFilesUsingGet(id: string | number, params?: any) {
return get(`/api/data-management/datasets/${id}/files`, params);
}
export function queryDatasetFilesUsingGet(
id: string | number,
params?: RequestParams
) {
return get(`/api/data-management/datasets/${id}/files`, params);
}
// 上传数据集文件
export function uploadDatasetFileUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/files`, data);
}
export function uploadDatasetFileUsingPost(
id: string | number,
data: RequestPayload
) {
return post(`/api/data-management/datasets/${id}/files`, data);
}
// 新建数据集文件夹
export function createDatasetDirectoryUsingPost(
@@ -104,34 +128,40 @@ export function deleteDatasetFileUsingDelete(
}
// 文件预览
export function previewDatasetUsingGet(id: string | number, params?: any) {
return get(`/api/data-management/datasets/${id}/preview`, params);
}
export function previewDatasetUsingGet(
id: string | number,
params?: RequestParams
) {
return get(`/api/data-management/datasets/${id}/preview`, params);
}
// 获取数据集标签
export function queryDatasetTagsUsingGet(params?: any) {
return get("/api/data-management/tags", params);
}
export function queryDatasetTagsUsingGet(params?: RequestParams) {
return get("/api/data-management/tags", params);
}
// 创建数据集标签
export function createDatasetTagUsingPost(data: any) {
return post("/api/data-management/tags", data);
}
export function createDatasetTagUsingPost(data: RequestPayload) {
return post("/api/data-management/tags", data);
}
// 更新数据集标签
export function updateDatasetTagUsingPut(data: any) {
return put(`/api/data-management/tags`, data);
}
export function updateDatasetTagUsingPut(data: RequestPayload) {
return put(`/api/data-management/tags`, data);
}
// 删除数据集标签
export function deleteDatasetTagUsingDelete(data: any) {
return del(`/api/data-management/tags`, data);
}
export function deleteDatasetTagUsingDelete(data: RequestPayload) {
return del(`/api/data-management/tags`, data);
}
// 数据集质量检查
export function checkDatasetQualityUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/quality-check`, data);
}
export function checkDatasetQualityUsingPost(
id: string | number,
data?: RequestPayload
) {
return post(`/api/data-management/datasets/${id}/quality-check`, data);
}
// 获取数据集质量报告
export function getDatasetQualityReportUsingGet(id: string | number) {
@@ -139,9 +169,12 @@ export function getDatasetQualityReportUsingGet(id: string | number) {
}
// 数据集分析
export function analyzeDatasetUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/analyze`, data);
}
export function analyzeDatasetUsingPost(
id: string | number,
data?: RequestPayload
) {
return post(`/api/data-management/datasets/${id}/analyze`, data);
}
// 获取数据集分析结果
export function getDatasetAnalysisUsingGet(id: string | number) {
@@ -149,27 +182,36 @@ export function getDatasetAnalysisUsingGet(id: string | number) {
}
// 导出数据集
export function exportDatasetUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/export`, data);
}
export function exportDatasetUsingPost(
id: string | number,
data: RequestPayload
) {
return post(`/api/data-management/datasets/${id}/export`, data);
}
// 复制数据集
export function copyDatasetUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/copy`, data);
}
export function copyDatasetUsingPost(
id: string | number,
data: RequestPayload
) {
return post(`/api/data-management/datasets/${id}/copy`, data);
}
// 获取数据集版本列表
export function queryDatasetVersionsUsingGet(
id: string | number,
params?: any
) {
return get(`/api/data-management/datasets/${id}/versions`, params);
}
export function queryDatasetVersionsUsingGet(
id: string | number,
params?: RequestParams
) {
return get(`/api/data-management/datasets/${id}/versions`, params);
}
// 创建数据集版本
export function createDatasetVersionUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/versions`, data);
}
export function createDatasetVersionUsingPost(
id: string | number,
data: RequestPayload
) {
return post(`/api/data-management/datasets/${id}/versions`, data);
}
// 切换数据集版本
export function switchDatasetVersionUsingPut(
@@ -193,25 +235,29 @@ export function deleteDatasetVersionUsingDelete(
* 文件上传相关接口
*/
export function preUploadUsingPost(id: string | number, data: any) {
return post(
`/api/data-management/datasets/${id}/files/upload/pre-upload`,
data
);
}
export function cancelUploadUsingPut(id) {
return put(
`/api/data-management/datasets/upload/cancel-upload/${id}`,
{},
{ showLoading: false }
);
}
export function uploadFileChunkUsingPost(id: string | number, params, config) {
return post(
`/api/data-management/datasets/${id}/files/upload/chunk`,
params,
export function preUploadUsingPost(id: string | number, data: RequestPayload) {
return post(
`/api/data-management/datasets/${id}/files/upload/pre-upload`,
data
);
}
export function cancelUploadUsingPut(id: string | number) {
return put(
`/api/data-management/datasets/upload/cancel-upload/${id}`,
{},
{ showLoading: false }
);
}
export function uploadFileChunkUsingPost(
id: string | number,
params: RequestPayload,
config: UploadChunkConfig
) {
return post(
`/api/data-management/datasets/${id}/files/upload/chunk`,
params,
{
showLoading: false,
...config,

View File

@@ -205,7 +205,7 @@ const KnowledgeSetDetail = () => {
return;
}
setReadItemId(record.id);
setReadTitle(record.title || "知识条目");
setReadTitle("知识条目");
if (!record.sourceDatasetId || !record.sourceFileId) {
const content = record.content || "";

View File

@@ -170,8 +170,8 @@ export default function KnowledgeItemEditor({
{!isCreateMode && isFileItem && (
<Form.Item label="文件">
<div className="flex items-center gap-2">
<span className="truncate" title={data?.sourceFileId || data?.title}>
{data?.sourceFileId || data?.title || "-"}
<span className="truncate" title={data?.sourceFileId || "-"}>
{data?.sourceFileId || "-"}
</span>
<Button size="small" onClick={handleDownloadFile} disabled={readOnly}>

View File

@@ -92,7 +92,6 @@ export type KnowledgeSetView = {
export type KnowledgeItemView = {
id: string;
setId: string;
title: string;
content: string;
contentType: KnowledgeContentType;
status: KnowledgeStatusMeta | null;
@@ -140,7 +139,6 @@ export function mapKnowledgeItem(data: KnowledgeItem): KnowledgeItemView {
return {
id: data.id,
setId: data.setId,
title: data.title,
content: data.content,
contentType: data.contentType,
status: knowledgeStatusMap[data.status] ?? null,

View File

@@ -48,7 +48,6 @@ export interface KnowledgeSet {
export interface KnowledgeItem {
id: string;
setId: string;
title: string;
content: string;
contentType: KnowledgeContentType;
status: KnowledgeStatusType;