"""三元组文本化 + 上下文构建的单元测试。""" from app.module.kg_graphrag.context_builder import ( RELATION_TEMPLATES, build_context, textualize_subgraph, ) from app.module.kg_graphrag.models import ( EntitySummary, RelationSummary, VectorChunk, ) # --------------------------------------------------------------------------- # textualize_subgraph 测试 # --------------------------------------------------------------------------- class TestTextualizeSubgraph: """textualize_subgraph 函数的测试。""" def test_single_relation(self): entities = [ EntitySummary(id="1", name="用户行为数据", type="Dataset"), EntitySummary(id="2", name="user_id", type="Field"), ] relations = [ RelationSummary( source_name="用户行为数据", source_type="Dataset", target_name="user_id", target_type="Field", relation_type="HAS_FIELD", ), ] result = textualize_subgraph(entities, relations) assert "Dataset'用户行为数据'包含字段Field'user_id'" in result def test_multiple_relations(self): entities = [ EntitySummary(id="1", name="用户行为数据", type="Dataset"), EntitySummary(id="2", name="清洗管道", type="Workflow"), ] relations = [ RelationSummary( source_name="清洗管道", source_type="Workflow", target_name="用户行为数据", target_type="Dataset", relation_type="USES_DATASET", ), RelationSummary( source_name="用户行为数据", source_type="Dataset", target_name="外部系统", target_type="DataSource", relation_type="SOURCED_FROM", ), ] result = textualize_subgraph(entities, relations) assert "Workflow'清洗管道'使用了数据集Dataset'用户行为数据'" in result assert "Dataset'用户行为数据'的知识来源于DataSource'外部系统'" in result def test_all_relation_templates(self): """验证所有 10 种关系模板都能正确生成。""" for rel_type, template in RELATION_TEMPLATES.items(): relations = [ RelationSummary( source_name="A", source_type="TypeA", target_name="B", target_type="TypeB", relation_type=rel_type, ), ] result = textualize_subgraph([], relations) assert "TypeA'A'" in result assert "TypeB'B'" in result assert result # 非空 def test_unknown_relation_type(self): """未知关系类型使用通用模板。""" relations = [ RelationSummary( source_name="X", source_type="T1", target_name="Y", target_type="T2", relation_type="CUSTOM_REL", ), ] result = textualize_subgraph([], relations) assert "T1'X'与T2'Y'存在CUSTOM_REL关系" in result def test_orphan_entity_with_description(self): """无关系的独立实体(有描述)。""" entities = [ EntitySummary(id="1", name="孤立实体", type="Dataset", description="这是一个测试实体"), ] result = textualize_subgraph(entities, []) assert "Dataset'孤立实体': 这是一个测试实体" in result def test_orphan_entity_without_description(self): """无关系的独立实体(无描述)。""" entities = [ EntitySummary(id="1", name="孤立实体", type="Dataset"), ] result = textualize_subgraph(entities, []) assert "存在Dataset'孤立实体'" in result def test_empty_inputs(self): result = textualize_subgraph([], []) assert result == "" def test_entity_with_relation_not_orphan(self): """有关系的实体不应出现在独立实体部分。""" entities = [ EntitySummary(id="1", name="A", type="Dataset"), EntitySummary(id="2", name="B", type="Field"), EntitySummary(id="3", name="C", type="Workflow"), ] relations = [ RelationSummary( source_name="A", source_type="Dataset", target_name="B", target_type="Field", relation_type="HAS_FIELD", ), ] result = textualize_subgraph(entities, relations) # A 和 B 有关系,不应作为独立实体出现 # C 无关系,应出现 assert "存在Workflow'C'" in result lines = result.strip().split("\n") assert len(lines) == 2 # 一条关系 + 一个独立实体 # --------------------------------------------------------------------------- # build_context 测试 # --------------------------------------------------------------------------- class TestBuildContext: """build_context 函数的测试。""" def test_both_vector_and_graph(self): chunks = [ VectorChunk(id="1", text="文档片段一", score=0.9), VectorChunk(id="2", text="文档片段二", score=0.8), ] graph_text = "Dataset'用户数据'包含字段Field'user_id'" result = build_context(chunks, graph_text) assert "## 相关文档" in result assert "[1] 文档片段一" in result assert "[2] 文档片段二" in result assert "## 知识图谱上下文" in result assert graph_text in result def test_vector_only(self): chunks = [VectorChunk(id="1", text="文档片段", score=0.9)] result = build_context(chunks, "") assert "## 相关文档" in result assert "## 知识图谱上下文" not in result def test_graph_only(self): result = build_context([], "图谱内容") assert "## 知识图谱上下文" in result assert "## 相关文档" not in result def test_empty_both(self): result = build_context([], "") assert "未检索到相关上下文信息" in result def test_context_section_order(self): """验证文档在图谱之前。""" chunks = [VectorChunk(id="1", text="doc", score=0.9)] result = build_context(chunks, "graph") doc_pos = result.index("## 相关文档") graph_pos = result.index("## 知识图谱上下文") assert doc_pos < graph_pos