You've already forked DataMate
init datamate
This commit is contained in:
10
runtime/ops/llms/qa_condition_evaluator/__init__.py
Normal file
10
runtime/ops/llms/qa_condition_evaluator/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
since:
|
||||
"""
|
||||
|
||||
from datamate.core.base_op import OPERATORS
|
||||
|
||||
OPERATORS.register_module(module_name='QAConditionEvaluator',
|
||||
module_path="ops.llms.qa_condition_evaluator.process")
|
||||
16
runtime/ops/llms/qa_condition_evaluator/metadata.yml
Normal file
16
runtime/ops/llms/qa_condition_evaluator/metadata.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
name: 'QA评估'
|
||||
name_en: 'QA Assessment'
|
||||
description: '通过用户维度和相应描述进行QA对评估。'
|
||||
description_en: 'Perform QA assessment based on the user dimension and corresponding description.'
|
||||
language: 'python'
|
||||
vendor: 'huawei'
|
||||
raw_id: 'QAConditionEvaluator'
|
||||
version: '1.0.0'
|
||||
types:
|
||||
- 'consolidate'
|
||||
modal: 'text'
|
||||
effect:
|
||||
before: ''
|
||||
after: ''
|
||||
inputs: 'text'
|
||||
outputs: 'text'
|
||||
98
runtime/ops/llms/qa_condition_evaluator/process.py
Normal file
98
runtime/ops/llms/qa_condition_evaluator/process.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# -- encoding: utf-8 --
|
||||
|
||||
"""
|
||||
Description: 基于LLM通过用户设置维度和相应描述进行QA对评估
|
||||
Create: 2023/11/7 9:26
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
from datamate.core.base_op import LLM
|
||||
|
||||
|
||||
class QAConditionEvaluator(LLM):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(QAConditionEvaluator, self).__init__(*args, **kwargs)
|
||||
self.pattern = r'结果[::] ?[YN]'
|
||||
self.template_path = Path(__file__).parent / "resources/template.txt"
|
||||
self.examples_path = Path(__file__).parent / "resources/examples.json"
|
||||
self.task_id = kwargs.get("taskId", "default_id")
|
||||
self.dimensions = kwargs.get("dimension", [
|
||||
{
|
||||
"dimension": "回答是否有针对性",
|
||||
"description": "回答应对问题中的所有疑问点提供正面、直接的回答,"
|
||||
"不应引起疑惑。同时,答案不应有任何内容的遗漏,需构成一个完整的陈述。"
|
||||
},
|
||||
{
|
||||
"dimension": "问题是否独立",
|
||||
"description": "仅分析问题,问题的主体和客体都比较明确,即使有省略,也符合语言习惯。"
|
||||
"在不需要补充其他信息的情况下不会引起疑惑。"
|
||||
},
|
||||
{
|
||||
"dimension": "语法是否错误",
|
||||
"description": "问题为疑问句,答案为陈述句; 不存在词语搭配不当的情况;连接词和标点符号不存在错用情况;"
|
||||
"逻辑混乱的情况不存在;语法结构都正确且完整;"
|
||||
}
|
||||
])
|
||||
|
||||
self.llm = self.get_llm(*args, **kwargs)
|
||||
self.prompts = self.build_llm_prompt(*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _process_examples(dimension_example: List) -> str:
|
||||
if not dimension_example:
|
||||
return "\n"
|
||||
res = "\n以下是一些案例供你参考:"
|
||||
for single_example in dimension_example:
|
||||
res += (f"\n问题:{single_example['question']}"
|
||||
f"\n回答:{single_example['answer']}"
|
||||
f"\n分析思路:{single_example['evaluate']}"
|
||||
f"\n结果:{single_example['result']}\n")
|
||||
return res
|
||||
|
||||
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]:
|
||||
start = time.time()
|
||||
qas = json.loads(sample[self.text_key])
|
||||
single_content_res = []
|
||||
for qa in qas:
|
||||
single_qa_res = []
|
||||
for dimension, prompt in self.prompts.items():
|
||||
local_result = self._llm_call_parse(qa, prompt, retry=2)
|
||||
single_qa_res.append({"dimension": dimension, "result": local_result})
|
||||
qa_response = {"qaId": qa["qaId"], "result": single_qa_res}
|
||||
single_content_res.append(qa_response)
|
||||
|
||||
sample[self.text_key] = "Sucess"
|
||||
self.save_sample(single_content_res, sample)
|
||||
cost_time = time.time() - start
|
||||
logger.info(f"task id: {self.task_id}, method: QAConditionEvaluator costs {cost_time:.6f} s")
|
||||
return sample
|
||||
|
||||
def build_llm_prompt(self, *args, **kwargs) -> Dict:
|
||||
templates = self.template_path.read_text(encoding="utf-8")
|
||||
examples_dict = json.loads(self.examples_path.read_text(encoding="utf-8"))
|
||||
prompts_dict = {}
|
||||
for dimension in self.dimensions:
|
||||
name, des = dimension["dimension"], dimension["description"]
|
||||
dimension_example = self._process_examples(examples_dict.get(name))
|
||||
dimension_prompt = templates.format(criterion=des, examples=dimension_example, question="{question}",
|
||||
answer="{answer}")
|
||||
prompts_dict[name] = dimension_prompt
|
||||
return prompts_dict
|
||||
|
||||
def _llm_call_parse(self, data: Dict, prompt: str, retry: int = 2):
|
||||
try:
|
||||
for _ in range(retry):
|
||||
response = self.llm(prompt.format(question=data["question"], answer=data["answer"]))
|
||||
result = re.findall(self.pattern, response)
|
||||
if result:
|
||||
return "Y" in result[0]
|
||||
except RuntimeError as e:
|
||||
logger.error(f"method: QAConditionEvaluator execution error, cause by {e}")
|
||||
return False
|
||||
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"回答是否有针对性": [
|
||||
{
|
||||
"question": "下发 IO 测试的命令格式是什么??",
|
||||
"answer": "下发 IO 测试的命令格式为:",
|
||||
"evaluate": "回答正面直接的回答了问题(命令格式是什么),但是答案以冒号结尾并不是完整的陈述,因此结果是N",
|
||||
"result": "N"
|
||||
},
|
||||
{
|
||||
"question": "HyperCDP的约束关系是什么?",
|
||||
"answer": "HyperCDP特性和OceanStor Dorado的其他功能之间存在一定的约束关系。",
|
||||
"evaluate": "回答提及到了HyperCDP特性和OceanStor Dorado的其他功能之间存在约束关系,并未直接回答约束关系是什么,因此结果是N。",
|
||||
"result": "N"
|
||||
}
|
||||
],
|
||||
"问题是否独立": [
|
||||
{
|
||||
"question": "这段文档主要讲述了什么内容?",
|
||||
"answer": "这段文档主要讲述了在治疗月经失调和怀孕问题时,需要根据患者的具体症状来调整治疗方法。主要方法包括温养肾气、调理气血、调补冲任、疏肝解郁、燥湿化痰、理气调经以及活血调经等。",
|
||||
"evaluate": "问题中省略了“这段文档”的具体指代,如果没有额外的上下文信息,单独这个问题可能会引起疑惑,因为听者或读者可能不知道指的是哪篇文章。在大多数情况下,为了使问题更加明确,提问者应该提供文章的标题、作者或相关背景信息。",
|
||||
"result": "N"
|
||||
},
|
||||
{
|
||||
"question": "这些鱼体内的不饱和脂肪酸含量非常高,这是为什么?",
|
||||
"answer": "因为这些鱼体内的脂肪酸构成比较特殊,不饱和脂肪酸的含量非常高,而饱和脂肪酸的含量则较低。",
|
||||
"evaluate": "在中文提问的语言习惯中,问题中使用“这些”来指代一组已经提及或已知的鱼是合适的。然而,问题本身并没有提供足够的信息来确定“这些鱼”具体指的是哪些鱼,这就造成了主体的不明确。",
|
||||
"result": "N"
|
||||
},
|
||||
{
|
||||
"question": "如何食用黑芝麻以达到最佳效果?",
|
||||
"answer": "我们平日里吃到的芝麻制品多为芝麻酱和芝麻香油。吃整粒的芝麻对于营养的吸收来说并不是最好的,因为芝麻的外面有一层稍硬的膜,只有把它碾碎,其中的营养素才能被吸收。所以,芝麻最好碾碎了再吃。",
|
||||
"evaluate": "在这个问题中,主体“黑芝麻”是具体的食物,客体“食用方法以达到最佳效果”也是明确的,因为人们通常关心如何食用某种食物以获得最大的健康益处。因此这个问题是符合标准的。",
|
||||
"result": "Y"
|
||||
}
|
||||
]
|
||||
}
|
||||
107
runtime/ops/llms/qa_condition_evaluator/resources/readme.md
Normal file
107
runtime/ops/llms/qa_condition_evaluator/resources/readme.md
Normal file
@@ -0,0 +1,107 @@
|
||||
# QA评估插件
|
||||
## 背景
|
||||
基于维度和描述对QA对进行评估,支持用户自定义维度。
|
||||
### 约束:
|
||||
- 维度小于10个
|
||||
- 维度名称低于20个字
|
||||
- 依赖大模型服务,服务输入输出如下:
|
||||
```python
|
||||
# 输入
|
||||
request_template = {
|
||||
"prompt": "你好",
|
||||
"max_length": 2024,
|
||||
"top_n": 0.9,
|
||||
"temperature": 0.9
|
||||
}
|
||||
# 输出
|
||||
response_template = {
|
||||
"response":"XXX"
|
||||
}
|
||||
```
|
||||
#### 默认3个维度:
|
||||
- 问题是否独立
|
||||
- 问答是否针对
|
||||
- 语法是否错误
|
||||
|
||||
## 调用接口输入
|
||||
```python
|
||||
inputs = [[
|
||||
{
|
||||
"businessData": {
|
||||
"params": {
|
||||
"taskId":1,
|
||||
"LLMUrl":"https://x.x.x.x:xxxx/qwen",
|
||||
"LLMHeaders":{"Content-Type": "application/json","User-Agent":"Client"},
|
||||
"LLMBody":{
|
||||
"prompt": "你好",
|
||||
"max_length": 2024,
|
||||
"top_n": 0.9,
|
||||
"temperature": 0.9
|
||||
},
|
||||
"dimension":[
|
||||
{"dimension":"回答是否有针对性",
|
||||
"description":"回答应对问题中的所有疑问点提供正面、直接的回答,不应引起疑惑。同时,答案不应有任何内容的遗漏,需构成一个完整的陈述。"
|
||||
},
|
||||
{"dimension":"问题是否独立",
|
||||
"description":"仅分析问题,问题的主体和客体都比较明确,即使有省略,也符合语言习惯。在不需要补充其他信息的情况下不会引起疑惑。"
|
||||
},
|
||||
{"dimension":"语法是否错误",
|
||||
"description":"问题为疑问句,答案为陈述句; 不存在词语搭配不当的情况;连接词和标点符号不存在错用情况;逻辑混乱的情况不存在;语法结构都正确且完整;"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passData": {
|
||||
"data": "",
|
||||
"text": "[{\"question\":\"什么是秋燥、秋困和秋冻?\",\"answer\":\"秋燥、秋困和秋冻是秋天常见的三种症状和养生问题。秋燥是指秋天天气干燥,导致人体水分流失,出现皮肤发痒、嘴唇起皮、鼻咽干燥等症状;秋困是指秋天天气凉爽,人体代谢下降,导致人感到无精打采、呵欠连天、昏昏欲睡等症状;秋冻是指秋天气温下降,人体需要适应气温的变化,不能一下子穿上很多衣服,让身体适应气温的变化。\",\"qaId\":1}]",
|
||||
"meta": {
|
||||
}
|
||||
},
|
||||
"contextData": {}
|
||||
}
|
||||
]]
|
||||
|
||||
```
|
||||
调用接口输出
|
||||
```python
|
||||
outputs = [
|
||||
{
|
||||
"businessData": {
|
||||
"params": {
|
||||
"taskId": 1,
|
||||
"LLMUrl": "https://x.x.x.x:xxxx/qwen",
|
||||
"LLMHeaders": {
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": "Client"
|
||||
},
|
||||
"LLMBody": {
|
||||
"prompt": "你好",
|
||||
"max_length": 2024,
|
||||
"top_n": 0.9,
|
||||
"temperature": 0.9
|
||||
},
|
||||
"dimension": [
|
||||
{
|
||||
"dimension": "回答是否有针对性",
|
||||
"description": "回答应对问题中的所有疑问点提供正面、直接的回答,不应引起疑惑。同时,答案不应有任何内容的遗漏,需构成一个完整的陈述。"
|
||||
},
|
||||
{
|
||||
"dimension": "问题是否独立",
|
||||
"description": "仅分析问题,问题的主体和客体都比较明确,即使有省略,也符合语言习惯。在不需要补充其他信息的情况下不会引起疑惑。"
|
||||
},
|
||||
{
|
||||
"dimension": "语法是否错误",
|
||||
"description": "问题为疑问句,答案为陈述句; 不存在词语搭配不当的情况;连接词和标点符号不存在错用情况;逻辑混乱的情况不存在;语法结构都正确且完整;"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passData": {
|
||||
"data": "",
|
||||
"text": "[{\"qaId\": 1, \"result\": [{\"dimension\": \"\回\答\是\否\有\针\对\性\", \"result\": true}, {\"dimension\": \"\问\题\是\否\独\立\", \"result\": true}, {\"dimension\": \"\语\法\是\否\错\误\", \"result\": true}]}]",
|
||||
"meta": {}
|
||||
},
|
||||
"contextData": {}
|
||||
}
|
||||
]
|
||||
```
|
||||
@@ -0,0 +1,17 @@
|
||||
你将会获得一个问答对,判断问答对是否满足以下标准:
|
||||
标准:"{criterion}"
|
||||
|
||||
要求:
|
||||
1. 结合以上标准,一步一步的分析问答对是否满足标准,按照模板输出你的回答。
|
||||
2. 如果你对自己的判断没有较强的信心,直接算作不满足标准。
|
||||
3. 你的最终裁定应该是'Y'表示是(符合标准)或'N'表示否(不符合标准)。
|
||||
4. 如果你的回答不符合模板格式和规范,重新思考回答。
|
||||
{examples}
|
||||
问答对:
|
||||
问题:"{question}"
|
||||
答案:"{answer}"
|
||||
|
||||
模板:
|
||||
结果:[插入结果N或Y]
|
||||
分析思路:XXX
|
||||
"""
|
||||
Reference in New Issue
Block a user