feat(data-management): 添加数据集相似度推荐功能

- 在DatasetApplicationService中实现getSimilarDatasets方法,支持基于标签匹配的相似数据集推荐
- 新增normalizeSimilarLimit、normalizeTagNames、countSharedTags等辅助方法用于相似度计算
- 在DatasetRepository接口及其实现类中添加findSimilarByTags方法,支持数据库层面的标签匹配查询
- 在DatasetController中暴露/similar REST API端点,支持按需获取相似数据集
- 在前端Overview组件中展示相似数据集表格,包含名称、标签、类型、文件数和更新时间等信息
- 在DatasetDetail页面集成相似数据集获取逻辑,限制默认返回数量为4条
- 移除KnowledgeItem中的冗余title字段,统一使用其他标识信息
- 优化知识管理相关组件中的标题显示逻辑,移除硬编码标题值
This commit is contained in:
2026-01-30 11:43:44 +08:00
parent c51cd2b6e4
commit c221666e67
12 changed files with 481 additions and 98 deletions

View File

@@ -1,8 +1,19 @@
import { App, Button, Descriptions, DescriptionsProps, Modal, Table, Input } from "antd";
import { formatBytes, formatDateTime } from "@/utils/unit";
import { Download, Trash2, Folder, File } from "lucide-react";
import {
App,
Button,
Descriptions,
DescriptionsProps,
Modal,
Table,
Input,
Tag,
} from "antd";
import { formatBytes, formatDateTime } from "@/utils/unit";
import { Download, Trash2, Folder, File } from "lucide-react";
import { datasetTypeMap } from "../../dataset.const";
import type { DatasetFile } from "@/pages/DataManagement/dataset.model";
import type { Dataset, DatasetFile } from "@/pages/DataManagement/dataset.model";
import { Link } from "react-router";
import type { useFilesOperation } from "../useFilesOperation";
type DatasetFileRow = DatasetFile & {
fileSize?: number;
@@ -18,14 +29,29 @@ const PREVIEW_MODAL_WIDTH = {
const PREVIEW_TEXT_FONT_SIZE = 12;
const PREVIEW_TEXT_PADDING = 12;
const PREVIEW_AUDIO_PADDING = 40;
const SIMILAR_TAGS_PREVIEW_LIMIT = 3;
const SIMILAR_DATASET_TAG_PREVIEW_LIMIT = 4;
type OverviewProps = {
dataset: Dataset;
filesOperation: ReturnType<typeof useFilesOperation>;
fetchDataset: () => void;
onUpload?: () => void;
similarDatasets: Dataset[];
similarDatasetsLoading: boolean;
similarTags: string[];
};
export default function Overview({
dataset,
filesOperation,
fetchDataset,
onUpload,
}) {
const { modal, message } = App.useApp();
similarDatasets,
similarDatasetsLoading,
similarTags,
}: OverviewProps) {
const { modal, message } = App.useApp();
const {
fileList,
pagination,
@@ -46,6 +72,82 @@ export default function Overview({
handleDeleteDirectory,
handlePreviewFile,
} = filesOperation;
const similarTagsSummary = (() => {
if (!similarTags || similarTags.length === 0) {
return "";
}
const visibleTags = similarTags.slice(0, SIMILAR_TAGS_PREVIEW_LIMIT);
const hiddenCount = similarTags.length - visibleTags.length;
if (hiddenCount > 0) {
return `${visibleTags.join("、")}${similarTags.length}`;
}
return visibleTags.join("、");
})();
const renderDatasetTags = (
tags?: Array<string | { name?: string; color?: string } | null>
) => {
if (!tags || tags.length === 0) {
return "-";
}
const visibleTags = tags.slice(0, SIMILAR_DATASET_TAG_PREVIEW_LIMIT);
const hiddenCount = tags.length - visibleTags.length;
return (
<div className="flex flex-wrap gap-1">
{visibleTags.map((tag, index) => {
const tagName = typeof tag === "string" ? tag : tag?.name;
if (!tagName) {
return null;
}
const tagColor = typeof tag === "string" ? undefined : tag?.color;
return (
<Tag key={`${tagName}-${index}`} color={tagColor}>
{tagName}
</Tag>
);
})}
{hiddenCount > 0 && <Tag>+{hiddenCount}</Tag>}
</div>
);
};
const similarColumns = [
{
title: "名称",
dataIndex: "name",
key: "name",
render: (_: string, record: Dataset) => (
<Link to={`/data/management/detail/${record.id}`}>{record.name}</Link>
),
},
{
title: "标签",
dataIndex: "tags",
key: "tags",
render: (tags: Array<string | { name?: string; color?: string }>) =>
renderDatasetTags(tags),
},
{
title: "类型",
dataIndex: "datasetType",
key: "datasetType",
width: 120,
render: (_: string, record: Dataset) =>
datasetTypeMap[record.datasetType as keyof typeof datasetTypeMap]?.label ||
"未知",
},
{
title: "文件数",
dataIndex: "fileCount",
key: "fileCount",
width: 120,
render: (value?: number) => value ?? 0,
},
{
title: "更新时间",
dataIndex: "updatedAt",
key: "updatedAt",
width: 180,
},
];
// 基本信息
const items: DescriptionsProps["items"] = [
@@ -265,7 +367,32 @@ export default function Overview({
column={5}
/>
{/* 文件列表 */}
{/* 相似数据集 */}
<div className="mt-8">
<div className="flex items-center justify-between mb-3">
<h2 className="text-base font-semibold"></h2>
{similarTagsSummary && (
<span className="text-xs text-gray-500">
{similarTagsSummary}
</span>
)}
</div>
<Table
size="small"
rowKey="id"
columns={similarColumns}
dataSource={similarDatasets}
loading={similarDatasetsLoading}
pagination={false}
locale={{
emptyText: similarTags?.length
? "暂无相似数据集"
: "当前数据集未设置标签",
}}
/>
</div>
{/* 文件列表 */}
<div className="flex items-center justify-between mt-8 mb-2">
<h2 className="text-base font-semibold"></h2>
<div className="flex items-center gap-2">