feature: add data-evaluation

* feature: add evaluation task management function

* feature: add evaluation task detail page

* fix: delete duplicate definition for table t_model_config

* refactor: rename package synthesis to ratio

* refactor: add eval file table and  refactor related code

* fix: calling large models in parallel during evaluation
This commit is contained in:
hefanli
2025-12-04 09:23:54 +08:00
committed by GitHub
parent 265e284fb8
commit 1d19cd3a62
52 changed files with 2882 additions and 1244 deletions

View File

@@ -0,0 +1,85 @@
import json
from enum import Enum
from jsonschema import validate
class ItemTypes(Enum):
QA = "QA"
class StructuredFileItemHandler:
def __init__(self):
pass
def get_item_type(self) -> ItemTypes:
pass
def get_items_from_file(self, file_path: str) -> list[dict]:
pass
def check_file(self) -> bool:
pass
class QAItemHandler(StructuredFileItemHandler):
def __init__(self):
self.schema_alpaca = {
"type": "object",
"properties": {
"instruction": {"type": "string"},
"input": {"type": "string"},
"output": {"type": "string"}
},
"required": ["instruction", "output"],
}
self.schema_alpaca_list = {
"type": "array",
"items": self.schema_alpaca,
}
super().__init__()
def get_item_type(self):
return ItemTypes.QA
def validate_json(self, data):
try:
validate(instance=data, schema=self.schema_alpaca)
return True
except Exception as e:
try:
validate(instance=data, schema=self.schema_alpaca_list)
return True
except Exception as e:
return False
def get_items_from_file(self, file_path: str) -> list[dict]:
file_type = file_path.split(".")[-1].upper()
items = []
if file_type == "JSON":
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
if not self.validate_json(data):
return items
items = data
elif file_type == "JSONL":
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
data = json.loads(line)
if not self.validate_json(data):
continue
items.append(data)
return items
def check_file(self) -> bool:
pass
class StructuredFileHandlerFactory:
def __init__(self):
self.handlers: list[StructuredFileItemHandler] = []
self.handlers.append(QAItemHandler())
def get_handler(self, item_type: str) -> StructuredFileItemHandler:
for handler in self.handlers:
if handler.get_item_type().value == item_type:
return handler
raise ValueError(f"Unsupported item type: {item_type}")