You've already forked DataMate
fix the ratio task config (#224)
* fix: fix the dataset card icon * fix: fix the dataset file tag distribution and ratio task * refactor: change dateRange config from latest to start-end
This commit is contained in:
@@ -78,7 +78,7 @@ public interface DatasetConverter {
|
||||
for (DatasetFile datasetFile : datasetFiles) {
|
||||
List<FileTag> tags = datasetFile.analyzeTag();
|
||||
if (CollectionUtils.isEmpty(tags)) {
|
||||
return distribution;
|
||||
continue;
|
||||
}
|
||||
for (FileTag tag : tags) {
|
||||
Map<String, Long> tagValueMap = distribution.getOrDefault(tag.getFromName(), new HashMap<>());
|
||||
|
||||
@@ -17,6 +17,7 @@ import {
|
||||
FileImage,
|
||||
FileText,
|
||||
Video,
|
||||
Film,
|
||||
FileCode,
|
||||
MessageCircleMore,
|
||||
ImagePlus,
|
||||
@@ -45,7 +46,7 @@ export const datasetTypeMap: Record<
|
||||
label: "文本",
|
||||
order: 1,
|
||||
icon: ScanText,
|
||||
iconColor: "blue",
|
||||
iconColor: "#A78BFA",
|
||||
children: [
|
||||
DatasetSubType.TEXT_DOCUMENT,
|
||||
DatasetSubType.TEXT_WEB,
|
||||
@@ -58,7 +59,7 @@ export const datasetTypeMap: Record<
|
||||
label: "图像",
|
||||
order: 2,
|
||||
icon: Image,
|
||||
iconColor: "green",
|
||||
iconColor: "#38BDF8",
|
||||
children: [DatasetSubType.IMAGE_IMAGE, DatasetSubType.IMAGE_CAPTION],
|
||||
description: "用于处理和分析图像数据的数据集",
|
||||
},
|
||||
@@ -67,7 +68,7 @@ export const datasetTypeMap: Record<
|
||||
label: "音频",
|
||||
order: 3,
|
||||
icon: Music,
|
||||
iconColor: "orange",
|
||||
iconColor: "#F59E0B",
|
||||
children: [DatasetSubType.AUDIO_AUDIO, DatasetSubType.AUDIO_JSONL],
|
||||
description: "用于处理和分析音频数据的数据集",
|
||||
},
|
||||
@@ -75,8 +76,8 @@ export const datasetTypeMap: Record<
|
||||
value: DatasetType.VIDEO,
|
||||
label: "视频",
|
||||
order: 3,
|
||||
icon: Video,
|
||||
iconColor: "purple",
|
||||
icon: Film,
|
||||
iconColor: "#22D3EE",
|
||||
children: [DatasetSubType.VIDEO_VIDEO, DatasetSubType.VIDEO_JSONL],
|
||||
description: "用于处理和分析视频数据的数据集",
|
||||
},
|
||||
@@ -206,6 +207,7 @@ export function mapDataset(dataset: AnyObject): Dataset {
|
||||
createdAt: formatDateTime(dataset.createdAt) || "--",
|
||||
updatedAt: formatDateTime(dataset?.updatedAt) || "--",
|
||||
icon: IconComponent ? <IconComponent className="w-full h-full" /> : <Database />,
|
||||
iconColor: iconColor,
|
||||
status: datasetStatusMap[dataset.status],
|
||||
statistics: [
|
||||
{ label: "文件数", value: dataset.fileCount || 0 },
|
||||
|
||||
@@ -7,6 +7,7 @@ import { useNavigate } from "react-router";
|
||||
import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx";
|
||||
import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx";
|
||||
import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx";
|
||||
import {formatDate} from "@/utils/unit.ts";
|
||||
|
||||
export default function CreateRatioTask() {
|
||||
const navigate = useNavigate();
|
||||
@@ -37,10 +38,11 @@ export default function CreateRatioTask() {
|
||||
}
|
||||
const totals = String(values.totalTargetCount);
|
||||
const config = ratioTaskForm.ratioConfigs.map((c) => {
|
||||
const dateRange = c.dateRange ? [formatDate(c.dateRange[0]), formatDate(c.dateRange[1])] : []
|
||||
return {
|
||||
datasetId: c.source,
|
||||
counts: String(c.quantity ?? 0),
|
||||
filterConditions: { label: c.labelFilter, dateRange: String(c.dateRange ?? 0)},
|
||||
filterConditions: { label: c.labelFilter, dateRange: dateRange},
|
||||
};
|
||||
});
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
Select,
|
||||
Table,
|
||||
InputNumber,
|
||||
DatePicker
|
||||
} from "antd";
|
||||
import { BarChart3 } from "lucide-react";
|
||||
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
|
||||
@@ -31,7 +32,7 @@ interface RatioConfigItem {
|
||||
percentage: number;
|
||||
source: string; // dataset id
|
||||
labelFilter?: LabelFilter;
|
||||
dateRange?: number;
|
||||
dateRange?: [Date | null, Date | null] | null;
|
||||
}
|
||||
|
||||
interface RatioConfigProps {
|
||||
@@ -303,20 +304,18 @@ const RatioConfig: FC<RatioConfigProps> = ({
|
||||
title: "标签更新时间",
|
||||
dataIndex: "dateRange",
|
||||
key: "dateRange",
|
||||
render: (_: any, record: RatioConfigItem) => (
|
||||
<Select
|
||||
style={{ width: "140px" }}
|
||||
placeholder="选择标签更新时间"
|
||||
value={record.dateRange}
|
||||
options={TIME_RANGE_OPTIONS}
|
||||
allowClear
|
||||
onChange={(value) =>
|
||||
updateConfig(record.id, {
|
||||
dateRange: value || undefined,
|
||||
})
|
||||
}
|
||||
/>
|
||||
),
|
||||
render: (_: any, record: RatioConfigItem) => {
|
||||
return (
|
||||
<DatePicker.RangePicker
|
||||
value={record.dateRange as any}
|
||||
onChange={(date) => {
|
||||
updateConfig(record.id, { dateRange: date });
|
||||
}}
|
||||
placeholder={["开始时间", "结束时间"]}
|
||||
allowClear
|
||||
/>
|
||||
);
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "数量",
|
||||
|
||||
@@ -12,20 +12,25 @@ class LabelFilter(BaseModel):
|
||||
value: Optional[str] = Field(None, description="标签值")
|
||||
|
||||
class FilterCondition(BaseModel):
|
||||
date_range: Optional[str] = Field(None, description="数据范围", alias="dateRange")
|
||||
date_range: Optional[List[str]] = Field(None, description="数据范围", alias="dateRange")
|
||||
label: Optional[LabelFilter] = Field(None, description="标签")
|
||||
|
||||
@field_validator("date_range")
|
||||
@classmethod
|
||||
def validate_date_range(cls, v: Optional[str]) -> Optional[str]:
|
||||
# ensure it's a numeric string if provided
|
||||
if not v:
|
||||
return v
|
||||
def validate_date_range(cls, date_range: Optional[List[str]]) -> Optional[List[str]]:
|
||||
# ensure it's a date range if provided
|
||||
if not date_range or len(date_range) == 0:
|
||||
return date_range
|
||||
if len(date_range) != 2:
|
||||
raise ValueError("date_range must be a list of two date strings: [start, end]")
|
||||
try:
|
||||
int(v)
|
||||
return v
|
||||
start = datetime.fromisoformat(date_range[0])
|
||||
end = datetime.fromisoformat(date_range[1])
|
||||
if start > end:
|
||||
raise ValueError("date_range start must be earlier than or equal to end")
|
||||
return date_range
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError("date_range must be a numeric string")
|
||||
raise ValueError("date_range items must be ISO date strings (e.g. YYYY-MM-DD)")
|
||||
|
||||
class Config:
|
||||
# allow population by field name when constructing model programmatically
|
||||
|
||||
@@ -271,14 +271,13 @@ class RatioTaskService:
|
||||
logger.info(f"start filter file: {file}, conditions: {conditions}")
|
||||
|
||||
# Check data range condition if provided
|
||||
if conditions.date_range:
|
||||
if conditions.date_range and len(conditions.date_range) == 2:
|
||||
try:
|
||||
from datetime import datetime, timedelta
|
||||
data_range_days = int(conditions.date_range)
|
||||
if data_range_days > 0:
|
||||
cutoff_date = datetime.now() - timedelta(days=data_range_days)
|
||||
if file.tags_updated_at and file.tags_updated_at < cutoff_date:
|
||||
return False
|
||||
start_at = datetime.fromisoformat(conditions.date_range[0])
|
||||
end_at = datetime.fromisoformat(conditions.date_range[1])
|
||||
if file.tags_updated_at and (file.tags_updated_at < start_at or file.tags_updated_at > end_at):
|
||||
return False
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"Invalid data_range value: {conditions.date_range}", e)
|
||||
return False
|
||||
@@ -294,7 +293,7 @@ class RatioTaskService:
|
||||
for tag in all_tags:
|
||||
if conditions.label.label and tag.get("label") != conditions.label.label:
|
||||
continue
|
||||
if conditions.label.value is None:
|
||||
if conditions.label.value is None or len(conditions.label.value) == 0:
|
||||
return True
|
||||
if tag.get("value") == conditions.label.value:
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user