You've already forked DataMate
feature: add data-evaluation
* feature: add evaluation task management function * feature: add evaluation task detail page * fix: delete duplicate definition for table t_model_config * refactor: rename package synthesis to ratio * refactor: add eval file table and refactor related code * fix: calling large models in parallel during evaluation
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
import json
|
||||
|
||||
from enum import Enum
|
||||
from jsonschema import validate
|
||||
|
||||
class ItemTypes(Enum):
|
||||
QA = "QA"
|
||||
|
||||
|
||||
class StructuredFileItemHandler:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_item_type(self) -> ItemTypes:
|
||||
pass
|
||||
|
||||
def get_items_from_file(self, file_path: str) -> list[dict]:
|
||||
pass
|
||||
|
||||
def check_file(self) -> bool:
|
||||
pass
|
||||
|
||||
class QAItemHandler(StructuredFileItemHandler):
|
||||
def __init__(self):
|
||||
self.schema_alpaca = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"instruction": {"type": "string"},
|
||||
"input": {"type": "string"},
|
||||
"output": {"type": "string"}
|
||||
},
|
||||
"required": ["instruction", "output"],
|
||||
}
|
||||
self.schema_alpaca_list = {
|
||||
"type": "array",
|
||||
"items": self.schema_alpaca,
|
||||
}
|
||||
super().__init__()
|
||||
|
||||
def get_item_type(self):
|
||||
return ItemTypes.QA
|
||||
|
||||
def validate_json(self, data):
|
||||
try:
|
||||
validate(instance=data, schema=self.schema_alpaca)
|
||||
return True
|
||||
except Exception as e:
|
||||
try:
|
||||
validate(instance=data, schema=self.schema_alpaca_list)
|
||||
return True
|
||||
except Exception as e:
|
||||
return False
|
||||
|
||||
def get_items_from_file(self, file_path: str) -> list[dict]:
|
||||
file_type = file_path.split(".")[-1].upper()
|
||||
items = []
|
||||
if file_type == "JSON":
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if not self.validate_json(data):
|
||||
return items
|
||||
items = data
|
||||
elif file_type == "JSONL":
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
data = json.loads(line)
|
||||
if not self.validate_json(data):
|
||||
continue
|
||||
items.append(data)
|
||||
return items
|
||||
|
||||
def check_file(self) -> bool:
|
||||
pass
|
||||
|
||||
|
||||
class StructuredFileHandlerFactory:
|
||||
def __init__(self):
|
||||
self.handlers: list[StructuredFileItemHandler] = []
|
||||
self.handlers.append(QAItemHandler())
|
||||
|
||||
def get_handler(self, item_type: str) -> StructuredFileItemHandler:
|
||||
for handler in self.handlers:
|
||||
if handler.get_item_type().value == item_type:
|
||||
return handler
|
||||
raise ValueError(f"Unsupported item type: {item_type}")
|
||||
Reference in New Issue
Block a user