fix the ratio task config (#224)

* fix: fix the dataset card icon

* fix: fix the dataset file tag distribution and ratio task

* refactor: change dateRange config from latest to start-end
This commit is contained in:
hefanli
2026-01-05 17:02:28 +08:00
committed by GitHub
parent 3f1ad6a872
commit a15a6134ff
6 changed files with 44 additions and 37 deletions

View File

@@ -78,7 +78,7 @@ public interface DatasetConverter {
for (DatasetFile datasetFile : datasetFiles) {
List<FileTag> tags = datasetFile.analyzeTag();
if (CollectionUtils.isEmpty(tags)) {
return distribution;
continue;
}
for (FileTag tag : tags) {
Map<String, Long> tagValueMap = distribution.getOrDefault(tag.getFromName(), new HashMap<>());

View File

@@ -17,6 +17,7 @@ import {
FileImage,
FileText,
Video,
Film,
FileCode,
MessageCircleMore,
ImagePlus,
@@ -45,7 +46,7 @@ export const datasetTypeMap: Record<
label: "文本",
order: 1,
icon: ScanText,
iconColor: "blue",
iconColor: "#A78BFA",
children: [
DatasetSubType.TEXT_DOCUMENT,
DatasetSubType.TEXT_WEB,
@@ -58,7 +59,7 @@ export const datasetTypeMap: Record<
label: "图像",
order: 2,
icon: Image,
iconColor: "green",
iconColor: "#38BDF8",
children: [DatasetSubType.IMAGE_IMAGE, DatasetSubType.IMAGE_CAPTION],
description: "用于处理和分析图像数据的数据集",
},
@@ -67,7 +68,7 @@ export const datasetTypeMap: Record<
label: "音频",
order: 3,
icon: Music,
iconColor: "orange",
iconColor: "#F59E0B",
children: [DatasetSubType.AUDIO_AUDIO, DatasetSubType.AUDIO_JSONL],
description: "用于处理和分析音频数据的数据集",
},
@@ -75,8 +76,8 @@ export const datasetTypeMap: Record<
value: DatasetType.VIDEO,
label: "视频",
order: 3,
icon: Video,
iconColor: "purple",
icon: Film,
iconColor: "#22D3EE",
children: [DatasetSubType.VIDEO_VIDEO, DatasetSubType.VIDEO_JSONL],
description: "用于处理和分析视频数据的数据集",
},
@@ -206,6 +207,7 @@ export function mapDataset(dataset: AnyObject): Dataset {
createdAt: formatDateTime(dataset.createdAt) || "--",
updatedAt: formatDateTime(dataset?.updatedAt) || "--",
icon: IconComponent ? <IconComponent className="w-full h-full" /> : <Database />,
iconColor: iconColor,
status: datasetStatusMap[dataset.status],
statistics: [
{ label: "文件数", value: dataset.fileCount || 0 },

View File

@@ -7,6 +7,7 @@ import { useNavigate } from "react-router";
import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx";
import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx";
import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx";
import {formatDate} from "@/utils/unit.ts";
export default function CreateRatioTask() {
const navigate = useNavigate();
@@ -37,10 +38,11 @@ export default function CreateRatioTask() {
}
const totals = String(values.totalTargetCount);
const config = ratioTaskForm.ratioConfigs.map((c) => {
const dateRange = c.dateRange ? [formatDate(c.dateRange[0]), formatDate(c.dateRange[1])] : []
return {
datasetId: c.source,
counts: String(c.quantity ?? 0),
filterConditions: { label: c.labelFilter, dateRange: String(c.dateRange ?? 0)},
filterConditions: { label: c.labelFilter, dateRange: dateRange},
};
});

View File

@@ -6,6 +6,7 @@ import {
Select,
Table,
InputNumber,
DatePicker
} from "antd";
import { BarChart3 } from "lucide-react";
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
@@ -31,7 +32,7 @@ interface RatioConfigItem {
percentage: number;
source: string; // dataset id
labelFilter?: LabelFilter;
dateRange?: number;
dateRange?: [Date | null, Date | null] | null;
}
interface RatioConfigProps {
@@ -303,20 +304,18 @@ const RatioConfig: FC<RatioConfigProps> = ({
title: "标签更新时间",
dataIndex: "dateRange",
key: "dateRange",
render: (_: any, record: RatioConfigItem) => (
<Select
style={{ width: "140px" }}
placeholder="选择标签更新时间"
value={record.dateRange}
options={TIME_RANGE_OPTIONS}
allowClear
onChange={(value) =>
updateConfig(record.id, {
dateRange: value || undefined,
})
}
/>
),
render: (_: any, record: RatioConfigItem) => {
return (
<DatePicker.RangePicker
value={record.dateRange as any}
onChange={(date) => {
updateConfig(record.id, { dateRange: date });
}}
placeholder={["开始时间", "结束时间"]}
allowClear
/>
);
},
},
{
title: "数量",

View File

@@ -12,20 +12,25 @@ class LabelFilter(BaseModel):
value: Optional[str] = Field(None, description="标签值")
class FilterCondition(BaseModel):
date_range: Optional[str] = Field(None, description="数据范围", alias="dateRange")
date_range: Optional[List[str]] = Field(None, description="数据范围", alias="dateRange")
label: Optional[LabelFilter] = Field(None, description="标签")
@field_validator("date_range")
@classmethod
def validate_date_range(cls, v: Optional[str]) -> Optional[str]:
# ensure it's a numeric string if provided
if not v:
return v
def validate_date_range(cls, date_range: Optional[List[str]]) -> Optional[List[str]]:
# ensure it's a date range if provided
if not date_range or len(date_range) == 0:
return date_range
if len(date_range) != 2:
raise ValueError("date_range must be a list of two date strings: [start, end]")
try:
int(v)
return v
start = datetime.fromisoformat(date_range[0])
end = datetime.fromisoformat(date_range[1])
if start > end:
raise ValueError("date_range start must be earlier than or equal to end")
return date_range
except (ValueError, TypeError) as e:
raise ValueError("date_range must be a numeric string")
raise ValueError("date_range items must be ISO date strings (e.g. YYYY-MM-DD)")
class Config:
# allow population by field name when constructing model programmatically

View File

@@ -271,14 +271,13 @@ class RatioTaskService:
logger.info(f"start filter file: {file}, conditions: {conditions}")
# Check data range condition if provided
if conditions.date_range:
if conditions.date_range and len(conditions.date_range) == 2:
try:
from datetime import datetime, timedelta
data_range_days = int(conditions.date_range)
if data_range_days > 0:
cutoff_date = datetime.now() - timedelta(days=data_range_days)
if file.tags_updated_at and file.tags_updated_at < cutoff_date:
return False
start_at = datetime.fromisoformat(conditions.date_range[0])
end_at = datetime.fromisoformat(conditions.date_range[1])
if file.tags_updated_at and (file.tags_updated_at < start_at or file.tags_updated_at > end_at):
return False
except (ValueError, TypeError) as e:
logger.warning(f"Invalid data_range value: {conditions.date_range}", e)
return False
@@ -294,7 +293,7 @@ class RatioTaskService:
for tag in all_tags:
if conditions.label.label and tag.get("label") != conditions.label.label:
continue
if conditions.label.value is None:
if conditions.label.value is None or len(conditions.label.value) == 0:
return True
if tag.get("value") == conditions.label.value:
return True