You've already forked DataMate
fix the ratio task config (#224)
* fix: fix the dataset card icon * fix: fix the dataset file tag distribution and ratio task * refactor: change dateRange config from latest to start-end
This commit is contained in:
@@ -78,7 +78,7 @@ public interface DatasetConverter {
|
|||||||
for (DatasetFile datasetFile : datasetFiles) {
|
for (DatasetFile datasetFile : datasetFiles) {
|
||||||
List<FileTag> tags = datasetFile.analyzeTag();
|
List<FileTag> tags = datasetFile.analyzeTag();
|
||||||
if (CollectionUtils.isEmpty(tags)) {
|
if (CollectionUtils.isEmpty(tags)) {
|
||||||
return distribution;
|
continue;
|
||||||
}
|
}
|
||||||
for (FileTag tag : tags) {
|
for (FileTag tag : tags) {
|
||||||
Map<String, Long> tagValueMap = distribution.getOrDefault(tag.getFromName(), new HashMap<>());
|
Map<String, Long> tagValueMap = distribution.getOrDefault(tag.getFromName(), new HashMap<>());
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ import {
|
|||||||
FileImage,
|
FileImage,
|
||||||
FileText,
|
FileText,
|
||||||
Video,
|
Video,
|
||||||
|
Film,
|
||||||
FileCode,
|
FileCode,
|
||||||
MessageCircleMore,
|
MessageCircleMore,
|
||||||
ImagePlus,
|
ImagePlus,
|
||||||
@@ -45,7 +46,7 @@ export const datasetTypeMap: Record<
|
|||||||
label: "文本",
|
label: "文本",
|
||||||
order: 1,
|
order: 1,
|
||||||
icon: ScanText,
|
icon: ScanText,
|
||||||
iconColor: "blue",
|
iconColor: "#A78BFA",
|
||||||
children: [
|
children: [
|
||||||
DatasetSubType.TEXT_DOCUMENT,
|
DatasetSubType.TEXT_DOCUMENT,
|
||||||
DatasetSubType.TEXT_WEB,
|
DatasetSubType.TEXT_WEB,
|
||||||
@@ -58,7 +59,7 @@ export const datasetTypeMap: Record<
|
|||||||
label: "图像",
|
label: "图像",
|
||||||
order: 2,
|
order: 2,
|
||||||
icon: Image,
|
icon: Image,
|
||||||
iconColor: "green",
|
iconColor: "#38BDF8",
|
||||||
children: [DatasetSubType.IMAGE_IMAGE, DatasetSubType.IMAGE_CAPTION],
|
children: [DatasetSubType.IMAGE_IMAGE, DatasetSubType.IMAGE_CAPTION],
|
||||||
description: "用于处理和分析图像数据的数据集",
|
description: "用于处理和分析图像数据的数据集",
|
||||||
},
|
},
|
||||||
@@ -67,7 +68,7 @@ export const datasetTypeMap: Record<
|
|||||||
label: "音频",
|
label: "音频",
|
||||||
order: 3,
|
order: 3,
|
||||||
icon: Music,
|
icon: Music,
|
||||||
iconColor: "orange",
|
iconColor: "#F59E0B",
|
||||||
children: [DatasetSubType.AUDIO_AUDIO, DatasetSubType.AUDIO_JSONL],
|
children: [DatasetSubType.AUDIO_AUDIO, DatasetSubType.AUDIO_JSONL],
|
||||||
description: "用于处理和分析音频数据的数据集",
|
description: "用于处理和分析音频数据的数据集",
|
||||||
},
|
},
|
||||||
@@ -75,8 +76,8 @@ export const datasetTypeMap: Record<
|
|||||||
value: DatasetType.VIDEO,
|
value: DatasetType.VIDEO,
|
||||||
label: "视频",
|
label: "视频",
|
||||||
order: 3,
|
order: 3,
|
||||||
icon: Video,
|
icon: Film,
|
||||||
iconColor: "purple",
|
iconColor: "#22D3EE",
|
||||||
children: [DatasetSubType.VIDEO_VIDEO, DatasetSubType.VIDEO_JSONL],
|
children: [DatasetSubType.VIDEO_VIDEO, DatasetSubType.VIDEO_JSONL],
|
||||||
description: "用于处理和分析视频数据的数据集",
|
description: "用于处理和分析视频数据的数据集",
|
||||||
},
|
},
|
||||||
@@ -206,6 +207,7 @@ export function mapDataset(dataset: AnyObject): Dataset {
|
|||||||
createdAt: formatDateTime(dataset.createdAt) || "--",
|
createdAt: formatDateTime(dataset.createdAt) || "--",
|
||||||
updatedAt: formatDateTime(dataset?.updatedAt) || "--",
|
updatedAt: formatDateTime(dataset?.updatedAt) || "--",
|
||||||
icon: IconComponent ? <IconComponent className="w-full h-full" /> : <Database />,
|
icon: IconComponent ? <IconComponent className="w-full h-full" /> : <Database />,
|
||||||
|
iconColor: iconColor,
|
||||||
status: datasetStatusMap[dataset.status],
|
status: datasetStatusMap[dataset.status],
|
||||||
statistics: [
|
statistics: [
|
||||||
{ label: "文件数", value: dataset.fileCount || 0 },
|
{ label: "文件数", value: dataset.fileCount || 0 },
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import { useNavigate } from "react-router";
|
|||||||
import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx";
|
import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx";
|
||||||
import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx";
|
import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx";
|
||||||
import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx";
|
import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx";
|
||||||
|
import {formatDate} from "@/utils/unit.ts";
|
||||||
|
|
||||||
export default function CreateRatioTask() {
|
export default function CreateRatioTask() {
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
@@ -37,10 +38,11 @@ export default function CreateRatioTask() {
|
|||||||
}
|
}
|
||||||
const totals = String(values.totalTargetCount);
|
const totals = String(values.totalTargetCount);
|
||||||
const config = ratioTaskForm.ratioConfigs.map((c) => {
|
const config = ratioTaskForm.ratioConfigs.map((c) => {
|
||||||
|
const dateRange = c.dateRange ? [formatDate(c.dateRange[0]), formatDate(c.dateRange[1])] : []
|
||||||
return {
|
return {
|
||||||
datasetId: c.source,
|
datasetId: c.source,
|
||||||
counts: String(c.quantity ?? 0),
|
counts: String(c.quantity ?? 0),
|
||||||
filterConditions: { label: c.labelFilter, dateRange: String(c.dateRange ?? 0)},
|
filterConditions: { label: c.labelFilter, dateRange: dateRange},
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import {
|
|||||||
Select,
|
Select,
|
||||||
Table,
|
Table,
|
||||||
InputNumber,
|
InputNumber,
|
||||||
|
DatePicker
|
||||||
} from "antd";
|
} from "antd";
|
||||||
import { BarChart3 } from "lucide-react";
|
import { BarChart3 } from "lucide-react";
|
||||||
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
|
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
|
||||||
@@ -31,7 +32,7 @@ interface RatioConfigItem {
|
|||||||
percentage: number;
|
percentage: number;
|
||||||
source: string; // dataset id
|
source: string; // dataset id
|
||||||
labelFilter?: LabelFilter;
|
labelFilter?: LabelFilter;
|
||||||
dateRange?: number;
|
dateRange?: [Date | null, Date | null] | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RatioConfigProps {
|
interface RatioConfigProps {
|
||||||
@@ -303,20 +304,18 @@ const RatioConfig: FC<RatioConfigProps> = ({
|
|||||||
title: "标签更新时间",
|
title: "标签更新时间",
|
||||||
dataIndex: "dateRange",
|
dataIndex: "dateRange",
|
||||||
key: "dateRange",
|
key: "dateRange",
|
||||||
render: (_: any, record: RatioConfigItem) => (
|
render: (_: any, record: RatioConfigItem) => {
|
||||||
<Select
|
return (
|
||||||
style={{ width: "140px" }}
|
<DatePicker.RangePicker
|
||||||
placeholder="选择标签更新时间"
|
value={record.dateRange as any}
|
||||||
value={record.dateRange}
|
onChange={(date) => {
|
||||||
options={TIME_RANGE_OPTIONS}
|
updateConfig(record.id, { dateRange: date });
|
||||||
|
}}
|
||||||
|
placeholder={["开始时间", "结束时间"]}
|
||||||
allowClear
|
allowClear
|
||||||
onChange={(value) =>
|
|
||||||
updateConfig(record.id, {
|
|
||||||
dateRange: value || undefined,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
/>
|
/>
|
||||||
),
|
);
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
title: "数量",
|
title: "数量",
|
||||||
|
|||||||
@@ -12,20 +12,25 @@ class LabelFilter(BaseModel):
|
|||||||
value: Optional[str] = Field(None, description="标签值")
|
value: Optional[str] = Field(None, description="标签值")
|
||||||
|
|
||||||
class FilterCondition(BaseModel):
|
class FilterCondition(BaseModel):
|
||||||
date_range: Optional[str] = Field(None, description="数据范围", alias="dateRange")
|
date_range: Optional[List[str]] = Field(None, description="数据范围", alias="dateRange")
|
||||||
label: Optional[LabelFilter] = Field(None, description="标签")
|
label: Optional[LabelFilter] = Field(None, description="标签")
|
||||||
|
|
||||||
@field_validator("date_range")
|
@field_validator("date_range")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_date_range(cls, v: Optional[str]) -> Optional[str]:
|
def validate_date_range(cls, date_range: Optional[List[str]]) -> Optional[List[str]]:
|
||||||
# ensure it's a numeric string if provided
|
# ensure it's a date range if provided
|
||||||
if not v:
|
if not date_range or len(date_range) == 0:
|
||||||
return v
|
return date_range
|
||||||
|
if len(date_range) != 2:
|
||||||
|
raise ValueError("date_range must be a list of two date strings: [start, end]")
|
||||||
try:
|
try:
|
||||||
int(v)
|
start = datetime.fromisoformat(date_range[0])
|
||||||
return v
|
end = datetime.fromisoformat(date_range[1])
|
||||||
|
if start > end:
|
||||||
|
raise ValueError("date_range start must be earlier than or equal to end")
|
||||||
|
return date_range
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
raise ValueError("date_range must be a numeric string")
|
raise ValueError("date_range items must be ISO date strings (e.g. YYYY-MM-DD)")
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
# allow population by field name when constructing model programmatically
|
# allow population by field name when constructing model programmatically
|
||||||
|
|||||||
@@ -271,13 +271,12 @@ class RatioTaskService:
|
|||||||
logger.info(f"start filter file: {file}, conditions: {conditions}")
|
logger.info(f"start filter file: {file}, conditions: {conditions}")
|
||||||
|
|
||||||
# Check data range condition if provided
|
# Check data range condition if provided
|
||||||
if conditions.date_range:
|
if conditions.date_range and len(conditions.date_range) == 2:
|
||||||
try:
|
try:
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
data_range_days = int(conditions.date_range)
|
start_at = datetime.fromisoformat(conditions.date_range[0])
|
||||||
if data_range_days > 0:
|
end_at = datetime.fromisoformat(conditions.date_range[1])
|
||||||
cutoff_date = datetime.now() - timedelta(days=data_range_days)
|
if file.tags_updated_at and (file.tags_updated_at < start_at or file.tags_updated_at > end_at):
|
||||||
if file.tags_updated_at and file.tags_updated_at < cutoff_date:
|
|
||||||
return False
|
return False
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
logger.warning(f"Invalid data_range value: {conditions.date_range}", e)
|
logger.warning(f"Invalid data_range value: {conditions.date_range}", e)
|
||||||
@@ -294,7 +293,7 @@ class RatioTaskService:
|
|||||||
for tag in all_tags:
|
for tag in all_tags:
|
||||||
if conditions.label.label and tag.get("label") != conditions.label.label:
|
if conditions.label.label and tag.get("label") != conditions.label.label:
|
||||||
continue
|
continue
|
||||||
if conditions.label.value is None:
|
if conditions.label.value is None or len(conditions.label.value) == 0:
|
||||||
return True
|
return True
|
||||||
if tag.get("value") == conditions.label.value:
|
if tag.get("value") == conditions.label.value:
|
||||||
return True
|
return True
|
||||||
|
|||||||
Reference in New Issue
Block a user