fix the ratio task config (#224)

* fix: fix the dataset card icon

* fix: fix the dataset file tag distribution and ratio task

* refactor: change dateRange config from latest to start-end
This commit is contained in:
hefanli
2026-01-05 17:02:28 +08:00
committed by GitHub
parent 3f1ad6a872
commit a15a6134ff
6 changed files with 44 additions and 37 deletions

View File

@@ -12,20 +12,25 @@ class LabelFilter(BaseModel):
value: Optional[str] = Field(None, description="标签值")
class FilterCondition(BaseModel):
date_range: Optional[str] = Field(None, description="数据范围", alias="dateRange")
date_range: Optional[List[str]] = Field(None, description="数据范围", alias="dateRange")
label: Optional[LabelFilter] = Field(None, description="标签")
@field_validator("date_range")
@classmethod
def validate_date_range(cls, v: Optional[str]) -> Optional[str]:
# ensure it's a numeric string if provided
if not v:
return v
def validate_date_range(cls, date_range: Optional[List[str]]) -> Optional[List[str]]:
# ensure it's a date range if provided
if not date_range or len(date_range) == 0:
return date_range
if len(date_range) != 2:
raise ValueError("date_range must be a list of two date strings: [start, end]")
try:
int(v)
return v
start = datetime.fromisoformat(date_range[0])
end = datetime.fromisoformat(date_range[1])
if start > end:
raise ValueError("date_range start must be earlier than or equal to end")
return date_range
except (ValueError, TypeError) as e:
raise ValueError("date_range must be a numeric string")
raise ValueError("date_range items must be ISO date strings (e.g. YYYY-MM-DD)")
class Config:
# allow population by field name when constructing model programmatically

View File

@@ -271,14 +271,13 @@ class RatioTaskService:
logger.info(f"start filter file: {file}, conditions: {conditions}")
# Check data range condition if provided
if conditions.date_range:
if conditions.date_range and len(conditions.date_range) == 2:
try:
from datetime import datetime, timedelta
data_range_days = int(conditions.date_range)
if data_range_days > 0:
cutoff_date = datetime.now() - timedelta(days=data_range_days)
if file.tags_updated_at and file.tags_updated_at < cutoff_date:
return False
start_at = datetime.fromisoformat(conditions.date_range[0])
end_at = datetime.fromisoformat(conditions.date_range[1])
if file.tags_updated_at and (file.tags_updated_at < start_at or file.tags_updated_at > end_at):
return False
except (ValueError, TypeError) as e:
logger.warning(f"Invalid data_range value: {conditions.date_range}", e)
return False
@@ -294,7 +293,7 @@ class RatioTaskService:
for tag in all_tags:
if conditions.label.label and tag.get("label") != conditions.label.label:
continue
if conditions.label.value is None:
if conditions.label.value is None or len(conditions.label.value) == 0:
return True
if tag.get("value") == conditions.label.value:
return True