diff --git a/frontend/src/pages/SynthesisTask/CreateTask.tsx b/frontend/src/pages/SynthesisTask/CreateTask.tsx index 5c00a2c..db59b2b 100644 --- a/frontend/src/pages/SynthesisTask/CreateTask.tsx +++ b/frontend/src/pages/SynthesisTask/CreateTask.tsx @@ -1,7 +1,7 @@ import { useEffect, useState } from "react"; import type { Dataset, DatasetFile } from "@/pages/DataManagement/dataset.model"; -import { Steps, Card, Select, Input, Checkbox, Button, Form, message } from "antd"; -import { Eye, ArrowLeft, ArrowRight, Play, Search, MoreHorizontal } from "lucide-react"; +import { Steps, Card, Select, Input, Button, Form, message, Tag, Tooltip, InputNumber } from "antd"; +import { Eye, ArrowLeft, ArrowRight, Play, Search, Sparkles, Brain, Layers } from "lucide-react"; import { Link, useNavigate } from "react-router"; import { queryDatasetsUsingGet } from "../DataManagement/dataset.api"; import DatasetFileTransfer from "@/components/business/DatasetFileTransfer"; @@ -31,13 +31,18 @@ export default function SynthesisTaskCreate() { const [selectedFiles, setSelectedFiles] = useState([]); const [selectedMap, setSelectedMap] = useState>({}); const [selectedDataset, setSelectedDataset] = useState(null); + // 当前选中的模板类型(QA / COT),用于高亮展示 const [selectedSynthesisTypes, setSelectedSynthesisTypes] = useState(["qa"]); const [taskType, setTaskType] = useState<"qa" | "cot">("qa"); - const [promptTemplate, setPromptTemplate] = useState(""); + const [questionPrompt, setQuestionPrompt] = useState(""); + const [answerPrompt, setAnswerPrompt] = useState(""); const [submitting, setSubmitting] = useState(false); const [modelOptions, setModelOptions] = useState<{ label: string; value: string }[]>([]); const [modelsLoading, setModelsLoading] = useState(false); - const [selectedModel, setSelectedModel] = useState(undefined); + const [questionModelId, setQuestionModelId] = useState(undefined); + const [answerModelId, setAnswerModelId] = useState(undefined); + + // 文本切片配置 const [sliceConfig, setSliceConfig] = useState({ processType: "DEFAULT_CHUNK" as | "DEFAULT_CHUNK" @@ -45,10 +50,23 @@ export default function SynthesisTaskCreate() { | "PARAGRAPH_CHUNK" | "FIXED_LENGTH_CHUNK" | "CUSTOM_SEPARATOR_CHUNK", - chunkSize: 500, - overlapSize: 50, + chunkSize: 3000, + overlapSize: 100, delimiter: "", }); + + // 问题/答案合成配置(与后端 question_synth_config / answer_synth_config 对齐) + const [questionConfig, setQuestionConfig] = useState({ + number: 1, + temperature: 0.7, + }); + const [answerConfig, setAnswerConfig] = useState({ + // 答案侧不再需要 number,只保留温度 + temperature: 0.7, + }); + // 合成总数上限,默认 5000 + const [maxQaPairs, setMaxQaPairs] = useState(5000); + const sliceOptions = [ { label: "默认分块", value: "DEFAULT_CHUNK" }, { label: "按章节分块", value: "CHAPTER_CHUNK" }, @@ -62,33 +80,43 @@ export default function SynthesisTaskCreate() { return data; }; - const fetchPrompt = async (type: "qa" | "cot") => { + // 问题 Prompt:固定使用 QUESTION 类型获取 + const fetchQuestionPrompt = async () => { try { - const synthTypeParam = type.toUpperCase(); - const res = await getPromptByTypeUsingGet(synthTypeParam); + const res = await getPromptByTypeUsingGet("QUESTION"); const prompt = typeof res === "string" ? res : (res as { data?: string })?.data ?? ""; - setPromptTemplate(prompt || ""); + setQuestionPrompt(prompt || ""); } catch (e) { console.error(e); - message.error("获取提示词模板失败"); - setPromptTemplate(""); + message.error("获取问题 Prompt 模板失败"); + setQuestionPrompt(""); } }; - useEffect(() => { - fetchDatasets(); - }, []); - - useEffect(() => { - fetchPrompt(taskType); - }, [taskType]); + // 答案 Prompt:根据当前任务类型获取 QA/COT 模板 + const fetchAnswerPrompt = async (type: "qa" | "cot") => { + try { + const synthTypeParam = type === "qa" ? "QA" : "COT"; + const res = await getPromptByTypeUsingGet(synthTypeParam); + const prompt = typeof res === "string" ? res : (res as { data?: string })?.data ?? ""; + setAnswerPrompt(prompt || ""); + } catch (e) { + console.error(e); + message.error("获取答案 Prompt 模板失败"); + setAnswerPrompt(""); + } + }; + // 拉取模型列表,仅保留 CHAT 模型 useEffect(() => { const loadModels = async () => { setModelsLoading(true); try { const { data } = await queryModelListUsingGet({ page: 0, size: 1000 }); - const options = (data?.content || []).map((model: ModelI) => ({ + const chatModels: ModelI[] = (data?.content || []).filter( + (model: ModelI) => model.type === "CHAT" + ); + const options = chatModels.map((model) => ({ label: `${model.modelName} (${model.provider})`, value: model.id, })); @@ -102,11 +130,22 @@ export default function SynthesisTaskCreate() { loadModels(); }, []); + // 默认选中第一个 CHAT 模型作为问题/答案模型 useEffect(() => { - if (!selectedModel && modelOptions.length > 0) { - setSelectedModel(modelOptions[0].value); + if (modelOptions.length > 0) { + setQuestionModelId((prev) => prev ?? modelOptions[0].value); + setAnswerModelId((prev) => prev ?? modelOptions[0].value); } - }, [modelOptions, selectedModel]); + }, [modelOptions]); + + useEffect(() => { + fetchDatasets(); + }, []); + + useEffect(() => { + fetchQuestionPrompt(); + fetchAnswerPrompt(taskType); + }, [taskType]); // 表单数据 const [formValues, setFormValues] = useState({ @@ -131,13 +170,12 @@ export default function SynthesisTaskCreate() { const handleCreateTask = async () => { try { const values = (await form.validateFields()) as CreateTaskFormValues; - // precise validation if (!(taskType === "qa" || taskType === "cot")) { message.error("请选择一个合成类型"); return; } - if (!selectedModel) { - message.error("请选择模型"); + if (!questionModelId || !answerModelId) { + message.error("请选择问题和答案使用的模型"); return; } if (selectedFiles.length === 0) { @@ -145,25 +183,42 @@ export default function SynthesisTaskCreate() { return; } - // 构造后端要求的参数格式 - const payload: Record = { - name: values.name || form.getFieldValue("name"), - model_id: selectedModel, - source_file_id: selectedFiles, + const synthConfig: Record = { text_split_config: { chunk_size: sliceConfig.chunkSize, chunk_overlap: sliceConfig.overlapSize, }, - synthesis_config: { - prompt_template: promptTemplate, + question_synth_config: { + model_id: questionModelId, + prompt_template: questionPrompt, + number: questionConfig.number, + temperature: questionConfig.temperature, }, - synthesis_type: taskType === "qa" ? "QA" : "COT", + answer_synth_config: { + model_id: answerModelId, + prompt_template: answerPrompt, + temperature: answerConfig.temperature, + }, + max_qa_pairs: typeof maxQaPairs === "number" && maxQaPairs > 0 ? maxQaPairs : undefined, }; - // 只有在有真实内容时携带 description,避免强制传空字符串 - const desc = values.description ?? form.getFieldValue("description"); - if (typeof desc === "string" && desc.trim().length > 0) { - payload.description = desc.trim(); + const payload: Record = { + name: values.name || form.getFieldValue("name"), + description: values.description ?? form.getFieldValue("description"), + synthesis_type: taskType === "qa" ? "QA" : "COT", + source_file_id: selectedFiles, + synth_config: synthConfig, + }; + + // 清洗 description:空字符串转为 undefined,让后端用 validator 处理为 None + const desc = payload.description; + if (typeof desc === "string" && desc.trim().length === 0) { + delete payload.description; + } + + // 如果未设置 max_qa_pairs,则从 synth_config 中移除该字段,避免传递 undefined + if (synthConfig.max_qa_pairs === undefined) { + delete (synthConfig as { max_qa_pairs?: number }).max_qa_pairs; } setSubmitting(true); @@ -187,25 +242,43 @@ export default function SynthesisTaskCreate() { return; } console.error(error); - message.error((error instanceof Error ? error.message : "合成任务创建失败")); + message.error(error instanceof Error ? error.message : "合成任务创建失败"); } finally { setSubmitting(false); } }; - // 仅两个一级类型,无二级目录 - const synthesisTypes = [ - { id: "qa", name: "生成问答对" }, - { id: "cot", name: "生成COT链式推理" }, - ] as const; + // 仅两个一级类型,无二级目录 -> 扩展为模板配置 + const synthesisTemplates = [ + { + id: "sft-qa", + type: "qa" as const, + title: "SFT 问答数据合成", + subtitle: "从长文档自动生成高质量问答样本", + badge: "推荐", + description: + "适用于构建监督微调(SFT)问答数据集,支持从知识库或长文档中抽取关键问答对。", + colorClass: "from-sky-500/10 via-sky-400/5 to-transparent", + borderClass: "border-sky-100 hover:border-sky-300", + icon: Sparkles, + }, + { + id: "cot-reasoning", + type: "cot" as const, + title: "COT 链式推理合成", + subtitle: "一步步推理过程与最终答案", + badge: "推理增强", + description: + "生成包含模型推理中间过程的 COT 数据,用于提升模型的复杂推理和解释能力。", + colorClass: "from-violet-500/10 via-violet-400/5 to-transparent", + borderClass: "border-violet-100 hover:border-violet-300", + icon: Brain, + }, + ]; - const handleSynthesisTypeSelect = (typeId: "qa" | "cot") => { - setSelectedSynthesisTypes((prev) => { - const next = prev.includes(typeId) ? [] : [typeId]; - if (next[0] === "qa") setTaskType("qa"); - if (next[0] === "cot") setTaskType("cot"); - return next; - }); + const handleTemplateClick = (tpl: (typeof synthesisTemplates)[number]) => { + setTaskType(tpl.type); + setSelectedSynthesisTypes([tpl.type]); }; useEffect(() => { @@ -247,120 +320,374 @@ export default function SynthesisTaskCreate() { if (createStep === 2) { return ( -
-
- {/* 左侧合成指令(仅两个一级类型,单选) */} +
+
+ {/* 左侧合成指令模板区:占 1/3 宽度 */}
- -

合成指令(仅支持单选)

-
-
- - + +
+
+

+ + 合成指令模板 +

+

+ 从左侧选择一个模板,我们会自动为你填充合适的 Prompt 与合成策略。 +

+ + 单选 +
-
- {synthesisTypes.map((type) => ( -
handleSynthesisTypeSelect(type.id)} - > - handleSynthesisTypeSelect(type.id)} - /> - {type.name} - -
- ))} + +
+
+ + +
+ +
+ {synthesisTemplates.map((tpl) => { + const Icon = tpl.icon; + const active = selectedSynthesisTypes.includes(tpl.type); + + return ( +
handleTemplateClick(tpl)} + className={`group relative rounded-xl border p-2.5 text-xs transition-all duration-200 cursor-pointer bg-white/80 hover:bg-white/100 ${ + tpl.borderClass + } ${ + active + ? "ring-1 ring-offset-1 ring-blue-500/60 border-blue-400/70 shadow-sm bg-gradient-to-r " + + tpl.colorClass + : "border-slate-100 hover:shadow-sm" + }`} + > +
+
+ +
+
+
+ + {tpl.title} + + {tpl.badge && ( + + {tpl.badge} + + )} +
+

+ {tpl.subtitle} +

+

+ {tpl.description} +

+
+
+ +
+ +
+ {active ? "✓" : ""} +
+
+
+
+ ); + })} +
- {/* 右侧合成配置 */} + {/* 右侧合成配置:占 2/3 宽度 */}
- -
-

合成配置

+ +
+
+

+ + 合成配置 +

+

+ 根据左侧模板自动带出配置,你也可以在此基础上进行微调。 +

+
- + + +
- {/* 切片配置 */} - + {/* 步骤说明条 */} +
+ 1 + 设置合成总数 + / + 2 + 配置文本切片策略 + / + 3 + 配置问题合成参数 + / + 4 + 配置答案合成参数 +
+ + {/* 1. 合成总数配置 */} +
+
+
+ 1 + 合成总数上限 +
+ 控制整个任务最多生成的 QA 对数量 +
+
+ setMaxQaPairs(typeof v === "number" ? v : undefined)} + /> + 可选项,建议在大规模合成时设置上限 +
+
+ + {/* 2. 文本切片配置 */} +
+
+
+ 2 + 文本切片配置 +
+ 影响上下文长度与召回粒度 +
- 分块策略 + 分块策略 setSliceConfig((p) => ({ ...p, chunkSize: Number(e.target.value) }))} + size="small" />
- 重叠大小 + 重叠大小 setSliceConfig((p) => ({ ...p, overlapSize: Number(e.target.value) }))} + size="small" />
{sliceConfig.processType === "CUSTOM_SEPARATOR_CHUNK" && (
- 自定义分隔符 + 自定义分隔符 setSliceConfig((p) => ({ ...p, delimiter: e.target.value }))} + size="small" />
)} - +
- {/* 模型选择 */} - - 模型选择 - setQuestionModelId(v)} + /> +
+
+ 问题 Prompt 模板 +

+ 用于指导模型如何从切片文本中生成高质量问题,可在保持变量占位符不变的前提下个性化修改。 +