fix the ratio task config (#224)

* fix: fix the dataset card icon

* fix: fix the dataset file tag distribution and ratio task

* refactor: change dateRange config from latest to start-end
This commit is contained in:
hefanli
2026-01-05 17:02:28 +08:00
committed by GitHub
parent 3f1ad6a872
commit a15a6134ff
6 changed files with 44 additions and 37 deletions

View File

@@ -78,7 +78,7 @@ public interface DatasetConverter {
for (DatasetFile datasetFile : datasetFiles) { for (DatasetFile datasetFile : datasetFiles) {
List<FileTag> tags = datasetFile.analyzeTag(); List<FileTag> tags = datasetFile.analyzeTag();
if (CollectionUtils.isEmpty(tags)) { if (CollectionUtils.isEmpty(tags)) {
return distribution; continue;
} }
for (FileTag tag : tags) { for (FileTag tag : tags) {
Map<String, Long> tagValueMap = distribution.getOrDefault(tag.getFromName(), new HashMap<>()); Map<String, Long> tagValueMap = distribution.getOrDefault(tag.getFromName(), new HashMap<>());

View File

@@ -17,6 +17,7 @@ import {
FileImage, FileImage,
FileText, FileText,
Video, Video,
Film,
FileCode, FileCode,
MessageCircleMore, MessageCircleMore,
ImagePlus, ImagePlus,
@@ -45,7 +46,7 @@ export const datasetTypeMap: Record<
label: "文本", label: "文本",
order: 1, order: 1,
icon: ScanText, icon: ScanText,
iconColor: "blue", iconColor: "#A78BFA",
children: [ children: [
DatasetSubType.TEXT_DOCUMENT, DatasetSubType.TEXT_DOCUMENT,
DatasetSubType.TEXT_WEB, DatasetSubType.TEXT_WEB,
@@ -58,7 +59,7 @@ export const datasetTypeMap: Record<
label: "图像", label: "图像",
order: 2, order: 2,
icon: Image, icon: Image,
iconColor: "green", iconColor: "#38BDF8",
children: [DatasetSubType.IMAGE_IMAGE, DatasetSubType.IMAGE_CAPTION], children: [DatasetSubType.IMAGE_IMAGE, DatasetSubType.IMAGE_CAPTION],
description: "用于处理和分析图像数据的数据集", description: "用于处理和分析图像数据的数据集",
}, },
@@ -67,7 +68,7 @@ export const datasetTypeMap: Record<
label: "音频", label: "音频",
order: 3, order: 3,
icon: Music, icon: Music,
iconColor: "orange", iconColor: "#F59E0B",
children: [DatasetSubType.AUDIO_AUDIO, DatasetSubType.AUDIO_JSONL], children: [DatasetSubType.AUDIO_AUDIO, DatasetSubType.AUDIO_JSONL],
description: "用于处理和分析音频数据的数据集", description: "用于处理和分析音频数据的数据集",
}, },
@@ -75,8 +76,8 @@ export const datasetTypeMap: Record<
value: DatasetType.VIDEO, value: DatasetType.VIDEO,
label: "视频", label: "视频",
order: 3, order: 3,
icon: Video, icon: Film,
iconColor: "purple", iconColor: "#22D3EE",
children: [DatasetSubType.VIDEO_VIDEO, DatasetSubType.VIDEO_JSONL], children: [DatasetSubType.VIDEO_VIDEO, DatasetSubType.VIDEO_JSONL],
description: "用于处理和分析视频数据的数据集", description: "用于处理和分析视频数据的数据集",
}, },
@@ -206,6 +207,7 @@ export function mapDataset(dataset: AnyObject): Dataset {
createdAt: formatDateTime(dataset.createdAt) || "--", createdAt: formatDateTime(dataset.createdAt) || "--",
updatedAt: formatDateTime(dataset?.updatedAt) || "--", updatedAt: formatDateTime(dataset?.updatedAt) || "--",
icon: IconComponent ? <IconComponent className="w-full h-full" /> : <Database />, icon: IconComponent ? <IconComponent className="w-full h-full" /> : <Database />,
iconColor: iconColor,
status: datasetStatusMap[dataset.status], status: datasetStatusMap[dataset.status],
statistics: [ statistics: [
{ label: "文件数", value: dataset.fileCount || 0 }, { label: "文件数", value: dataset.fileCount || 0 },

View File

@@ -7,6 +7,7 @@ import { useNavigate } from "react-router";
import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx"; import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx";
import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx"; import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx";
import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx"; import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx";
import {formatDate} from "@/utils/unit.ts";
export default function CreateRatioTask() { export default function CreateRatioTask() {
const navigate = useNavigate(); const navigate = useNavigate();
@@ -37,10 +38,11 @@ export default function CreateRatioTask() {
} }
const totals = String(values.totalTargetCount); const totals = String(values.totalTargetCount);
const config = ratioTaskForm.ratioConfigs.map((c) => { const config = ratioTaskForm.ratioConfigs.map((c) => {
const dateRange = c.dateRange ? [formatDate(c.dateRange[0]), formatDate(c.dateRange[1])] : []
return { return {
datasetId: c.source, datasetId: c.source,
counts: String(c.quantity ?? 0), counts: String(c.quantity ?? 0),
filterConditions: { label: c.labelFilter, dateRange: String(c.dateRange ?? 0)}, filterConditions: { label: c.labelFilter, dateRange: dateRange},
}; };
}); });

View File

@@ -6,6 +6,7 @@ import {
Select, Select,
Table, Table,
InputNumber, InputNumber,
DatePicker
} from "antd"; } from "antd";
import { BarChart3 } from "lucide-react"; import { BarChart3 } from "lucide-react";
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts"; import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
@@ -31,7 +32,7 @@ interface RatioConfigItem {
percentage: number; percentage: number;
source: string; // dataset id source: string; // dataset id
labelFilter?: LabelFilter; labelFilter?: LabelFilter;
dateRange?: number; dateRange?: [Date | null, Date | null] | null;
} }
interface RatioConfigProps { interface RatioConfigProps {
@@ -303,20 +304,18 @@ const RatioConfig: FC<RatioConfigProps> = ({
title: "标签更新时间", title: "标签更新时间",
dataIndex: "dateRange", dataIndex: "dateRange",
key: "dateRange", key: "dateRange",
render: (_: any, record: RatioConfigItem) => ( render: (_: any, record: RatioConfigItem) => {
<Select return (
style={{ width: "140px" }} <DatePicker.RangePicker
placeholder="选择标签更新时间" value={record.dateRange as any}
value={record.dateRange} onChange={(date) => {
options={TIME_RANGE_OPTIONS} updateConfig(record.id, { dateRange: date });
}}
placeholder={["开始时间", "结束时间"]}
allowClear allowClear
onChange={(value) =>
updateConfig(record.id, {
dateRange: value || undefined,
})
}
/> />
), );
},
}, },
{ {
title: "数量", title: "数量",

View File

@@ -12,20 +12,25 @@ class LabelFilter(BaseModel):
value: Optional[str] = Field(None, description="标签值") value: Optional[str] = Field(None, description="标签值")
class FilterCondition(BaseModel): class FilterCondition(BaseModel):
date_range: Optional[str] = Field(None, description="数据范围", alias="dateRange") date_range: Optional[List[str]] = Field(None, description="数据范围", alias="dateRange")
label: Optional[LabelFilter] = Field(None, description="标签") label: Optional[LabelFilter] = Field(None, description="标签")
@field_validator("date_range") @field_validator("date_range")
@classmethod @classmethod
def validate_date_range(cls, v: Optional[str]) -> Optional[str]: def validate_date_range(cls, date_range: Optional[List[str]]) -> Optional[List[str]]:
# ensure it's a numeric string if provided # ensure it's a date range if provided
if not v: if not date_range or len(date_range) == 0:
return v return date_range
if len(date_range) != 2:
raise ValueError("date_range must be a list of two date strings: [start, end]")
try: try:
int(v) start = datetime.fromisoformat(date_range[0])
return v end = datetime.fromisoformat(date_range[1])
if start > end:
raise ValueError("date_range start must be earlier than or equal to end")
return date_range
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
raise ValueError("date_range must be a numeric string") raise ValueError("date_range items must be ISO date strings (e.g. YYYY-MM-DD)")
class Config: class Config:
# allow population by field name when constructing model programmatically # allow population by field name when constructing model programmatically

View File

@@ -271,13 +271,12 @@ class RatioTaskService:
logger.info(f"start filter file: {file}, conditions: {conditions}") logger.info(f"start filter file: {file}, conditions: {conditions}")
# Check data range condition if provided # Check data range condition if provided
if conditions.date_range: if conditions.date_range and len(conditions.date_range) == 2:
try: try:
from datetime import datetime, timedelta from datetime import datetime, timedelta
data_range_days = int(conditions.date_range) start_at = datetime.fromisoformat(conditions.date_range[0])
if data_range_days > 0: end_at = datetime.fromisoformat(conditions.date_range[1])
cutoff_date = datetime.now() - timedelta(days=data_range_days) if file.tags_updated_at and (file.tags_updated_at < start_at or file.tags_updated_at > end_at):
if file.tags_updated_at and file.tags_updated_at < cutoff_date:
return False return False
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
logger.warning(f"Invalid data_range value: {conditions.date_range}", e) logger.warning(f"Invalid data_range value: {conditions.date_range}", e)
@@ -294,7 +293,7 @@ class RatioTaskService:
for tag in all_tags: for tag in all_tags:
if conditions.label.label and tag.get("label") != conditions.label.label: if conditions.label.label and tag.get("label") != conditions.label.label:
continue continue
if conditions.label.value is None: if conditions.label.value is None or len(conditions.label.value) == 0:
return True return True
if tag.get("value") == conditions.label.value: if tag.get("value") == conditions.label.value:
return True return True