"""知识图谱三元组抽取数据模型。""" from __future__ import annotations from pydantic import BaseModel, Field class GraphNode(BaseModel): """图谱节点(实体)。""" name: str = Field(..., description="实体名称") type: str = Field(..., description="实体类型, 如 Person, Organization, Location") properties: dict[str, object] = Field(default_factory=dict, description="扩展属性") class GraphEdge(BaseModel): """图谱边(关系)。""" source: str = Field(..., description="源实体名称") target: str = Field(..., description="目标实体名称") relation_type: str = Field(..., description="关系类型, 如 works_at, located_in") properties: dict[str, object] = Field(default_factory=dict, description="关系属性") class Triple(BaseModel): """知识三元组: (主体, 关系, 客体)。""" subject: GraphNode predicate: str = Field(..., description="关系类型") object: GraphNode class EntityTypeConstraint(BaseModel): """实体类型约束,用于 Schema-guided 抽取。""" name: str = Field(..., description="类型名称") description: str = Field(default="", description="类型说明") class RelationTypeConstraint(BaseModel): """关系类型约束。""" name: str = Field(..., description="关系类型名称") source_types: list[str] = Field(default_factory=list, description="允许的源实体类型") target_types: list[str] = Field(default_factory=list, description="允许的目标实体类型") description: str = Field(default="", description="关系说明") class ExtractionSchema(BaseModel): """抽取 schema 约束,约束 LLM 输出的实体和关系类型范围。""" entity_types: list[EntityTypeConstraint] = Field(default_factory=list) relation_types: list[RelationTypeConstraint] = Field(default_factory=list) class ExtractionRequest(BaseModel): """三元组抽取请求。""" text: str = Field(..., description="待抽取的文本") graph_id: str = Field(..., description="目标图谱 ID") schema: ExtractionSchema | None = Field( default=None, description="可选的 schema 约束, 提供后做 schema-guided 抽取" ) source_id: str | None = Field(default=None, description="来源 ID(数据集/知识库条目)") source_type: str = Field(default="KNOWLEDGE_BASE", description="来源类型") class ExtractionResult(BaseModel): """三元组抽取结果。""" nodes: list[GraphNode] = Field(default_factory=list) edges: list[GraphEdge] = Field(default_factory=list) triples: list[Triple] = Field(default_factory=list) raw_text: str = Field(default="", description="原始文本") source_id: str | None = None