You've already forked DataMate
feat(kg-sync): 实现图谱构建流程(MySQL → Neo4j 同步)
实现功能:
- 实现 GraphSyncService(同步编排器)
- 实现 GraphSyncStepService(同步步骤执行器)
- 实现 GraphSyncController(同步 API)
- 实现 GraphInitializer(图谱初始化)
- 实现 DataManagementClient(数据源客户端)
同步功能:
- syncDatasets:同步数据集实体
- syncFields:同步字段实体
- syncUsers:同步用户实体
- syncOrgs:同步组织实体
- buildHasFieldRelations:构建 HAS_FIELD 关系
- buildDerivedFromRelations:构建 DERIVED_FROM 关系
- buildBelongsToRelations:构建 BELONGS_TO 关系
- syncAll:全量同步(实体 + 关系 + 对账删除)
API 端点:
- POST /{graphId}/sync/full:全量同步
- POST /{graphId}/sync/datasets:同步数据集
- POST /{graphId}/sync/fields:同步字段
- POST /{graphId}/sync/users:同步用户
- POST /{graphId}/sync/orgs:同步组织
- POST /{graphId}/sync/relations/has-field:构建 HAS_FIELD
- POST /{graphId}/sync/relations/derived-from:构建 DERIVED_FROM
- POST /{graphId}/sync/relations/belongs-to:构建 BELONGS_TO
技术实现:
- Upsert 策略:
- 实体:两阶段(Cypher MERGE 原子创建 + SDN save 更新扩展属性)
- 关系:Cypher MERGE 幂等创建
- 全量对账删除:purgeStaleEntities() 删除 MySQL 中已删除的实体
- 并发安全:
- 图级互斥锁(ConcurrentHashMap<String, ReentrantLock>)
- 复合唯一约束(graph_id, source_id, type)
- 锁自动回收(releaseLock() 原子检查并移除空闲锁)
- 重试机制:HTTP 调用失败时按指数退避重试(默认 3 次)
- 错误处理:
- 逐条错误处理(单条失败不影响其他记录)
- 统一异常包装(BusinessException.of(SYNC_FAILED))
- 错误信息脱敏(仅返回 errorCount + syncId)
- 事务管理:
- GraphSyncService(编排器,无事务)
- GraphSyncStepService(步骤执行器,@Transactional)
- 性能优化:
- 全量同步共享数据快照
- 批量日志跟踪
- 图谱初始化:
- 1 个唯一性约束(entity ID)
- 1 个复合唯一约束(graph_id, source_id, type)
- 9 个索引(5 个单字段 + 3 个复合 + 1 个全文)
- 幂等性保证(IF NOT EXISTS)
代码审查:
- 经过 3 轮 Codex 审查和 2 轮 Claude 修复
- 所有问题已解决(3个P0 + 5个P1 + 3个P2 + 1个P3)
- 编译验证通过(mvn compile SUCCESS)
设计决策:
- 最终一致性:允许短暂的数据不一致
- 对账机制:定期对比并修复差异
- 信任边界:网关负责鉴权,服务层只做格式校验
- 多实例部署:依赖复合唯一约束兜底
This commit is contained in:
@@ -1,11 +1,28 @@
|
||||
package com.datamate.knowledgegraph;
|
||||
|
||||
import com.datamate.knowledgegraph.infrastructure.neo4j.KnowledgeGraphProperties;
|
||||
import org.springframework.boot.web.client.RestTemplateBuilder;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.data.neo4j.repository.config.EnableNeo4jRepositories;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
@Configuration
|
||||
@ComponentScan(basePackages = "com.datamate.knowledgegraph")
|
||||
@EnableNeo4jRepositories(basePackages = "com.datamate.knowledgegraph.domain.repository")
|
||||
@EnableScheduling
|
||||
public class KnowledgeGraphServiceConfiguration {
|
||||
|
||||
@Bean("kgRestTemplate")
|
||||
public RestTemplate kgRestTemplate(RestTemplateBuilder builder, KnowledgeGraphProperties properties) {
|
||||
KnowledgeGraphProperties.Sync syncConfig = properties.getSync();
|
||||
return builder
|
||||
.connectTimeout(Duration.ofMillis(syncConfig.getConnectTimeout()))
|
||||
.readTimeout(Duration.ofMillis(syncConfig.getReadTimeout()))
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,335 @@
|
||||
package com.datamate.knowledgegraph.application;
|
||||
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.datamate.knowledgegraph.domain.model.SyncResult;
|
||||
import com.datamate.knowledgegraph.infrastructure.client.DataManagementClient;
|
||||
import com.datamate.knowledgegraph.infrastructure.client.DataManagementClient.DatasetDTO;
|
||||
import com.datamate.knowledgegraph.infrastructure.exception.KnowledgeGraphErrorCode;
|
||||
import com.datamate.knowledgegraph.infrastructure.neo4j.KnowledgeGraphProperties;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 知识图谱数据同步编排器(无 {@code @Transactional})。
|
||||
* <p>
|
||||
* 负责拉取数据、编排同步步骤、管理并发锁,具体写操作委托给
|
||||
* {@link GraphSyncStepService}(事务边界)。
|
||||
* <p>
|
||||
* <b>并发控制</b>:同一 graphId 的同步操作通过 {@link ReentrantLock} 互斥,
|
||||
* 防止并发写入导致数据不一致。
|
||||
* <p>
|
||||
* <b>数据快照</b>:全量同步时只拉取一次数据集列表,
|
||||
* 在各步骤间共享,避免重复 HTTP 调用。
|
||||
* <p>
|
||||
* <b>多实例部署</b>:当前图级锁为进程内 {@link ReentrantLock},
|
||||
* 多实例部署时依赖 Neo4j 复合唯一约束 (graph_id, source_id, type)
|
||||
* 兜底防止重复写入。如需严格互斥,可替换为 Redis/DB 分布式锁。
|
||||
* <p>
|
||||
* <b>信任边界</b>:本服务仅通过内网被 API Gateway / 定时任务调用,
|
||||
* 网关层已完成用户身份认证与权限校验。
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class GraphSyncService {
|
||||
|
||||
private static final Pattern UUID_PATTERN = Pattern.compile(
|
||||
"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
||||
);
|
||||
|
||||
private final GraphSyncStepService stepService;
|
||||
private final DataManagementClient dataManagementClient;
|
||||
private final KnowledgeGraphProperties properties;
|
||||
|
||||
/** 同 graphId 互斥锁,防止并发同步。 */
|
||||
private final ConcurrentHashMap<String, ReentrantLock> graphLocks = new ConcurrentHashMap<>();
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 全量同步
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
public List<SyncResult> syncAll(String graphId) {
|
||||
validateGraphId(graphId);
|
||||
String syncId = UUID.randomUUID().toString().substring(0, 8);
|
||||
|
||||
ReentrantLock lock = acquireLock(graphId, syncId);
|
||||
try {
|
||||
log.info("[{}] Starting full sync for graphId={}", syncId, graphId);
|
||||
|
||||
// 一次拉取,全程共享
|
||||
List<DatasetDTO> datasets = fetchDatasetsWithRetry(syncId);
|
||||
|
||||
List<SyncResult> results = new ArrayList<>();
|
||||
|
||||
// 实体同步
|
||||
results.add(stepService.upsertDatasetEntities(graphId, datasets, syncId));
|
||||
results.add(stepService.upsertFieldEntities(graphId, datasets, syncId));
|
||||
|
||||
Set<String> usernames = extractUsernames(datasets);
|
||||
results.add(stepService.upsertUserEntities(graphId, usernames, syncId));
|
||||
results.add(stepService.upsertOrgEntities(graphId, syncId));
|
||||
|
||||
// 全量对账:删除 MySQL 已移除的记录,并回填 purged 到对应 SyncResult
|
||||
Set<String> activeDatasetIds = datasets.stream()
|
||||
.map(DatasetDTO::getId)
|
||||
.collect(Collectors.toSet());
|
||||
results.get(0).setPurged(
|
||||
stepService.purgeStaleEntities(graphId, "Dataset", activeDatasetIds, syncId));
|
||||
|
||||
Set<String> activeFieldIds = new HashSet<>();
|
||||
for (DatasetDTO dto : datasets) {
|
||||
if (dto.getTags() != null) {
|
||||
for (DataManagementClient.TagDTO tag : dto.getTags()) {
|
||||
activeFieldIds.add(dto.getId() + ":tag:" + tag.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
results.get(1).setPurged(
|
||||
stepService.purgeStaleEntities(graphId, "Field", activeFieldIds, syncId));
|
||||
|
||||
Set<String> activeUserIds = usernames.stream()
|
||||
.map(u -> "user:" + u)
|
||||
.collect(Collectors.toSet());
|
||||
results.get(2).setPurged(
|
||||
stepService.purgeStaleEntities(graphId, "User", activeUserIds, syncId));
|
||||
|
||||
// 关系构建(MERGE 幂等)
|
||||
results.add(stepService.mergeHasFieldRelations(graphId, syncId));
|
||||
results.add(stepService.mergeDerivedFromRelations(graphId, syncId));
|
||||
results.add(stepService.mergeBelongsToRelations(graphId, syncId));
|
||||
|
||||
log.info("[{}] Full sync completed for graphId={}. Summary: {}", syncId, graphId,
|
||||
results.stream()
|
||||
.map(r -> r.getSyncType() + "(+" + r.getCreated() + "/~" + r.getUpdated() + "/-" + r.getFailed() + ")")
|
||||
.collect(Collectors.joining(", ")));
|
||||
return results;
|
||||
} catch (BusinessException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("[{}] Full sync failed for graphId={}", syncId, graphId, e);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED, "全量同步失败,syncId=" + syncId);
|
||||
} finally {
|
||||
releaseLock(graphId, lock);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 单步同步(各自获取锁和数据)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
public SyncResult syncDatasets(String graphId) {
|
||||
validateGraphId(graphId);
|
||||
String syncId = UUID.randomUUID().toString().substring(0, 8);
|
||||
ReentrantLock lock = acquireLock(graphId, syncId);
|
||||
try {
|
||||
List<DatasetDTO> datasets = fetchDatasetsWithRetry(syncId);
|
||||
SyncResult result = stepService.upsertDatasetEntities(graphId, datasets, syncId);
|
||||
Set<String> activeIds = datasets.stream().map(DatasetDTO::getId).collect(Collectors.toSet());
|
||||
int purged = stepService.purgeStaleEntities(graphId, "Dataset", activeIds, syncId);
|
||||
result.setPurged(purged);
|
||||
return result;
|
||||
} catch (BusinessException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("[{}] Dataset sync failed for graphId={}", syncId, graphId, e);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED, "数据集同步失败,syncId=" + syncId);
|
||||
} finally {
|
||||
releaseLock(graphId, lock);
|
||||
}
|
||||
}
|
||||
|
||||
public SyncResult syncFields(String graphId) {
|
||||
validateGraphId(graphId);
|
||||
String syncId = UUID.randomUUID().toString().substring(0, 8);
|
||||
ReentrantLock lock = acquireLock(graphId, syncId);
|
||||
try {
|
||||
List<DatasetDTO> datasets = fetchDatasetsWithRetry(syncId);
|
||||
SyncResult result = stepService.upsertFieldEntities(graphId, datasets, syncId);
|
||||
Set<String> activeFieldIds = new HashSet<>();
|
||||
for (DatasetDTO dto : datasets) {
|
||||
if (dto.getTags() != null) {
|
||||
for (DataManagementClient.TagDTO tag : dto.getTags()) {
|
||||
activeFieldIds.add(dto.getId() + ":tag:" + tag.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
result.setPurged(stepService.purgeStaleEntities(graphId, "Field", activeFieldIds, syncId));
|
||||
return result;
|
||||
} catch (BusinessException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("[{}] Field sync failed for graphId={}", syncId, graphId, e);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED, "字段同步失败,syncId=" + syncId);
|
||||
} finally {
|
||||
releaseLock(graphId, lock);
|
||||
}
|
||||
}
|
||||
|
||||
public SyncResult syncUsers(String graphId) {
|
||||
validateGraphId(graphId);
|
||||
String syncId = UUID.randomUUID().toString().substring(0, 8);
|
||||
ReentrantLock lock = acquireLock(graphId, syncId);
|
||||
try {
|
||||
List<DatasetDTO> datasets = fetchDatasetsWithRetry(syncId);
|
||||
Set<String> usernames = extractUsernames(datasets);
|
||||
SyncResult result = stepService.upsertUserEntities(graphId, usernames, syncId);
|
||||
Set<String> activeUserIds = usernames.stream().map(u -> "user:" + u).collect(Collectors.toSet());
|
||||
result.setPurged(stepService.purgeStaleEntities(graphId, "User", activeUserIds, syncId));
|
||||
return result;
|
||||
} catch (BusinessException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("[{}] User sync failed for graphId={}", syncId, graphId, e);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED, "用户同步失败,syncId=" + syncId);
|
||||
} finally {
|
||||
releaseLock(graphId, lock);
|
||||
}
|
||||
}
|
||||
|
||||
public SyncResult syncOrgs(String graphId) {
|
||||
validateGraphId(graphId);
|
||||
String syncId = UUID.randomUUID().toString().substring(0, 8);
|
||||
ReentrantLock lock = acquireLock(graphId, syncId);
|
||||
try {
|
||||
return stepService.upsertOrgEntities(graphId, syncId);
|
||||
} catch (BusinessException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("[{}] Org sync failed for graphId={}", syncId, graphId, e);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED, "组织同步失败,syncId=" + syncId);
|
||||
} finally {
|
||||
releaseLock(graphId, lock);
|
||||
}
|
||||
}
|
||||
|
||||
public SyncResult buildHasFieldRelations(String graphId) {
|
||||
validateGraphId(graphId);
|
||||
String syncId = UUID.randomUUID().toString().substring(0, 8);
|
||||
ReentrantLock lock = acquireLock(graphId, syncId);
|
||||
try {
|
||||
return stepService.mergeHasFieldRelations(graphId, syncId);
|
||||
} catch (BusinessException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("[{}] HAS_FIELD relation build failed for graphId={}", syncId, graphId, e);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED,
|
||||
"HAS_FIELD 关系构建失败,syncId=" + syncId);
|
||||
} finally {
|
||||
releaseLock(graphId, lock);
|
||||
}
|
||||
}
|
||||
|
||||
public SyncResult buildDerivedFromRelations(String graphId) {
|
||||
validateGraphId(graphId);
|
||||
String syncId = UUID.randomUUID().toString().substring(0, 8);
|
||||
ReentrantLock lock = acquireLock(graphId, syncId);
|
||||
try {
|
||||
return stepService.mergeDerivedFromRelations(graphId, syncId);
|
||||
} catch (BusinessException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("[{}] DERIVED_FROM relation build failed for graphId={}", syncId, graphId, e);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED,
|
||||
"DERIVED_FROM 关系构建失败,syncId=" + syncId);
|
||||
} finally {
|
||||
releaseLock(graphId, lock);
|
||||
}
|
||||
}
|
||||
|
||||
public SyncResult buildBelongsToRelations(String graphId) {
|
||||
validateGraphId(graphId);
|
||||
String syncId = UUID.randomUUID().toString().substring(0, 8);
|
||||
ReentrantLock lock = acquireLock(graphId, syncId);
|
||||
try {
|
||||
return stepService.mergeBelongsToRelations(graphId, syncId);
|
||||
} catch (BusinessException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("[{}] BELONGS_TO relation build failed for graphId={}", syncId, graphId, e);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED,
|
||||
"BELONGS_TO 关系构建失败,syncId=" + syncId);
|
||||
} finally {
|
||||
releaseLock(graphId, lock);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 内部方法
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
private ReentrantLock acquireLock(String graphId, String syncId) {
|
||||
ReentrantLock lock = graphLocks.computeIfAbsent(graphId, k -> new ReentrantLock());
|
||||
if (!lock.tryLock()) {
|
||||
log.warn("[{}] Graph {} is already being synced, rejecting concurrent request", syncId, graphId);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED, "该图谱正在同步中,请稍后重试");
|
||||
}
|
||||
return lock;
|
||||
}
|
||||
|
||||
/**
|
||||
* 释放锁并在无竞争时清理锁对象,防止 graphLocks 无限增长。
|
||||
*/
|
||||
private void releaseLock(String graphId, ReentrantLock lock) {
|
||||
lock.unlock();
|
||||
graphLocks.compute(graphId, (key, existing) -> {
|
||||
// 仅当锁空闲且无等待线程时移除,compute 保证此 key 的原子性
|
||||
if (existing != null && !existing.isLocked() && !existing.hasQueuedThreads()) {
|
||||
return null;
|
||||
}
|
||||
return existing;
|
||||
});
|
||||
}
|
||||
|
||||
private List<DatasetDTO> fetchDatasetsWithRetry(String syncId) {
|
||||
int maxRetries = properties.getSync().getMaxRetries();
|
||||
long retryInterval = properties.getSync().getRetryInterval();
|
||||
Exception lastException = null;
|
||||
|
||||
for (int attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
return dataManagementClient.listAllDatasets();
|
||||
} catch (Exception e) {
|
||||
lastException = e;
|
||||
log.warn("[{}] Dataset fetch attempt {}/{} failed: {}", syncId, attempt, maxRetries, e.getMessage());
|
||||
if (attempt < maxRetries) {
|
||||
try {
|
||||
Thread.sleep(retryInterval * attempt);
|
||||
} catch (InterruptedException ie) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED, "同步被中断");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log.error("[{}] All {} fetch attempts failed", syncId, maxRetries, lastException);
|
||||
throw BusinessException.of(KnowledgeGraphErrorCode.SYNC_FAILED,
|
||||
"拉取数据集失败(已重试 " + maxRetries + " 次),syncId=" + syncId);
|
||||
}
|
||||
|
||||
private static Set<String> extractUsernames(List<DatasetDTO> datasets) {
|
||||
Set<String> usernames = new LinkedHashSet<>();
|
||||
for (DatasetDTO dto : datasets) {
|
||||
if (dto.getCreatedBy() != null && !dto.getCreatedBy().isBlank()) {
|
||||
usernames.add(dto.getCreatedBy());
|
||||
}
|
||||
if (dto.getUpdatedBy() != null && !dto.getUpdatedBy().isBlank()) {
|
||||
usernames.add(dto.getUpdatedBy());
|
||||
}
|
||||
}
|
||||
return usernames;
|
||||
}
|
||||
|
||||
private void validateGraphId(String graphId) {
|
||||
if (graphId == null || !UUID_PATTERN.matcher(graphId).matches()) {
|
||||
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER, "graphId 格式无效");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,407 @@
|
||||
package com.datamate.knowledgegraph.application;
|
||||
|
||||
import com.datamate.knowledgegraph.domain.model.GraphEntity;
|
||||
import com.datamate.knowledgegraph.domain.model.SyncResult;
|
||||
import com.datamate.knowledgegraph.domain.repository.GraphEntityRepository;
|
||||
import com.datamate.knowledgegraph.infrastructure.client.DataManagementClient.DatasetDTO;
|
||||
import com.datamate.knowledgegraph.infrastructure.client.DataManagementClient.TagDTO;
|
||||
import com.datamate.knowledgegraph.infrastructure.neo4j.KnowledgeGraphProperties;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.data.neo4j.core.Neo4jClient;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* 同步步骤执行器(事务边界)。
|
||||
* <p>
|
||||
* 所有写操作在独立 {@code @Transactional} 方法中执行,
|
||||
* 由 {@link GraphSyncService} 编排调用,避免自调用导致事务失效。
|
||||
* <p>
|
||||
* 关系构建使用 Cypher MERGE 保证幂等性,
|
||||
* 实体 upsert 使用 Cypher MERGE 基于 (graph_id, source_id, type) 复合约束原子操作,
|
||||
* 扩展属性通过 SDN 更新。
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class GraphSyncStepService {
|
||||
|
||||
private static final String SOURCE_TYPE_SYNC = "SYNC";
|
||||
private static final String REL_TYPE = "RELATED_TO";
|
||||
|
||||
private final GraphEntityRepository entityRepository;
|
||||
private final Neo4jClient neo4jClient;
|
||||
private final KnowledgeGraphProperties properties;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 实体 upsert
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
@Transactional
|
||||
public SyncResult upsertDatasetEntities(String graphId, List<DatasetDTO> datasets, String syncId) {
|
||||
SyncResult result = beginResult("Dataset", syncId);
|
||||
int batchSize = properties.getImportBatchSize();
|
||||
|
||||
for (int i = 0; i < datasets.size(); i++) {
|
||||
DatasetDTO dto = datasets.get(i);
|
||||
try {
|
||||
Map<String, Object> props = new HashMap<>();
|
||||
props.put("dataset_type", dto.getDatasetType());
|
||||
props.put("status", dto.getStatus());
|
||||
props.put("total_size", dto.getTotalSize());
|
||||
props.put("file_count", dto.getFileCount());
|
||||
if (dto.getParentDatasetId() != null) {
|
||||
props.put("parent_dataset_id", dto.getParentDatasetId());
|
||||
}
|
||||
if (dto.getTags() != null) {
|
||||
List<String> tagNames = dto.getTags().stream()
|
||||
.map(TagDTO::getName).toList();
|
||||
props.put("tags", tagNames);
|
||||
}
|
||||
|
||||
upsertEntity(graphId, dto.getId(), "Dataset",
|
||||
dto.getName(), dto.getDescription(), props, result);
|
||||
|
||||
if ((i + 1) % batchSize == 0) {
|
||||
log.debug("[{}] Processed {}/{} datasets", syncId, i + 1, datasets.size());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[{}] Failed to upsert dataset: sourceId={}", syncId, dto.getId(), e);
|
||||
result.addError("dataset:" + dto.getId());
|
||||
}
|
||||
}
|
||||
return endResult(result);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public SyncResult upsertFieldEntities(String graphId, List<DatasetDTO> datasets, String syncId) {
|
||||
SyncResult result = beginResult("Field", syncId);
|
||||
|
||||
for (DatasetDTO dto : datasets) {
|
||||
if (dto.getTags() == null || dto.getTags().isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
for (TagDTO tag : dto.getTags()) {
|
||||
try {
|
||||
String fieldSourceId = dto.getId() + ":tag:" + tag.getName();
|
||||
Map<String, Object> props = new HashMap<>();
|
||||
props.put("data_type", "TAG");
|
||||
props.put("dataset_source_id", dto.getId());
|
||||
if (tag.getColor() != null) {
|
||||
props.put("color", tag.getColor());
|
||||
}
|
||||
|
||||
upsertEntity(graphId, fieldSourceId, "Field", tag.getName(),
|
||||
"数据集[" + dto.getName() + "]的标签字段", props, result);
|
||||
} catch (Exception e) {
|
||||
log.warn("[{}] Failed to upsert field: dataset={}, tag={}",
|
||||
syncId, dto.getId(), tag.getName(), e);
|
||||
result.addError("field:" + dto.getId() + ":" + tag.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
return endResult(result);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public SyncResult upsertUserEntities(String graphId, Set<String> usernames, String syncId) {
|
||||
SyncResult result = beginResult("User", syncId);
|
||||
|
||||
for (String username : usernames) {
|
||||
try {
|
||||
Map<String, Object> props = new HashMap<>();
|
||||
props.put("username", username);
|
||||
upsertEntity(graphId, "user:" + username, "User", username, null, props, result);
|
||||
} catch (Exception e) {
|
||||
log.warn("[{}] Failed to upsert user: username={}", syncId, username, e);
|
||||
result.addError("user:" + username);
|
||||
}
|
||||
}
|
||||
return endResult(result);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public SyncResult upsertOrgEntities(String graphId, String syncId) {
|
||||
SyncResult result = beginResult("Org", syncId);
|
||||
|
||||
try {
|
||||
Map<String, Object> props = new HashMap<>();
|
||||
props.put("org_code", "DEFAULT");
|
||||
props.put("level", 1);
|
||||
upsertEntity(graphId, "org:default", "Org", "默认组织",
|
||||
"系统默认组织(待对接组织服务后更新)", props, result);
|
||||
} catch (Exception e) {
|
||||
log.warn("[{}] Failed to upsert default org", syncId, e);
|
||||
result.addError("org:default");
|
||||
}
|
||||
return endResult(result);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 全量对账删除
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* 删除 Neo4j 中 source_type=SYNC 但 source_id 不在活跃集合中的实体。
|
||||
* 使用 DETACH DELETE 同时清理关联关系。
|
||||
* <p>
|
||||
* 当 activeSourceIds 为空时,说明数据源中该类型实体已全部移除,
|
||||
* 将清除图中所有对应的 SYNC 实体。调用方需确保空集是数据拉取成功后的
|
||||
* 真实结果(fetchDatasetsWithRetry 失败时会抛异常,不会到达此处)。
|
||||
*/
|
||||
@Transactional
|
||||
public int purgeStaleEntities(String graphId, String type, Set<String> activeSourceIds, String syncId) {
|
||||
String cypher;
|
||||
Map<String, Object> params;
|
||||
|
||||
if (activeSourceIds.isEmpty()) {
|
||||
log.warn("[{}] Active source IDs empty for type={}, purging ALL SYNC entities of this type",
|
||||
syncId, type);
|
||||
cypher = "MATCH (e:Entity {graph_id: $graphId, type: $type, source_type: 'SYNC'}) " +
|
||||
"DETACH DELETE e " +
|
||||
"RETURN count(*) AS deleted";
|
||||
params = Map.of("graphId", graphId, "type", type);
|
||||
} else {
|
||||
cypher = "MATCH (e:Entity {graph_id: $graphId, type: $type, source_type: 'SYNC'}) " +
|
||||
"WHERE NOT e.source_id IN $activeSourceIds " +
|
||||
"DETACH DELETE e " +
|
||||
"RETURN count(*) AS deleted";
|
||||
params = Map.of(
|
||||
"graphId", graphId,
|
||||
"type", type,
|
||||
"activeSourceIds", new ArrayList<>(activeSourceIds)
|
||||
);
|
||||
}
|
||||
|
||||
long deleted = neo4jClient.query(cypher)
|
||||
.bindAll(params)
|
||||
.fetchAs(Long.class)
|
||||
.mappedBy((ts, record) -> record.get("deleted").asLong())
|
||||
.one()
|
||||
.orElse(0L);
|
||||
|
||||
if (deleted > 0) {
|
||||
log.info("[{}] Purged {} stale {} entities from graphId={}", syncId, deleted, type, graphId);
|
||||
}
|
||||
return (int) deleted;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 关系构建(MERGE 保证幂等)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
@Transactional
|
||||
public SyncResult mergeHasFieldRelations(String graphId, String syncId) {
|
||||
SyncResult result = beginResult("HAS_FIELD", syncId);
|
||||
|
||||
List<GraphEntity> fields = entityRepository.findByGraphIdAndType(graphId, "Field");
|
||||
|
||||
for (GraphEntity field : fields) {
|
||||
try {
|
||||
Object datasetSourceId = field.getProperties().get("dataset_source_id");
|
||||
if (datasetSourceId == null) {
|
||||
result.incrementSkipped();
|
||||
continue;
|
||||
}
|
||||
|
||||
Optional<GraphEntity> datasetOpt = entityRepository.findByGraphIdAndSourceIdAndType(
|
||||
graphId, datasetSourceId.toString(), "Dataset");
|
||||
if (datasetOpt.isEmpty()) {
|
||||
result.incrementSkipped();
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean created = mergeRelation(graphId, datasetOpt.get().getId(), field.getId(),
|
||||
"HAS_FIELD", "{}", syncId);
|
||||
if (created) {
|
||||
result.incrementCreated();
|
||||
} else {
|
||||
result.incrementSkipped();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[{}] Failed to merge HAS_FIELD for field: id={}", syncId, field.getId(), e);
|
||||
result.addError("has_field:" + field.getId());
|
||||
}
|
||||
}
|
||||
return endResult(result);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public SyncResult mergeDerivedFromRelations(String graphId, String syncId) {
|
||||
SyncResult result = beginResult("DERIVED_FROM", syncId);
|
||||
|
||||
List<GraphEntity> datasets = entityRepository.findByGraphIdAndType(graphId, "Dataset");
|
||||
|
||||
for (GraphEntity dataset : datasets) {
|
||||
try {
|
||||
Object parentId = dataset.getProperties().get("parent_dataset_id");
|
||||
if (parentId == null || parentId.toString().isBlank()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Optional<GraphEntity> parentOpt = entityRepository.findByGraphIdAndSourceIdAndType(
|
||||
graphId, parentId.toString(), "Dataset");
|
||||
if (parentOpt.isEmpty()) {
|
||||
result.incrementSkipped();
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean created = mergeRelation(graphId, dataset.getId(), parentOpt.get().getId(),
|
||||
"DERIVED_FROM", "{\"derivation_type\":\"VERSION\"}", syncId);
|
||||
if (created) {
|
||||
result.incrementCreated();
|
||||
} else {
|
||||
result.incrementSkipped();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[{}] Failed to merge DERIVED_FROM for dataset: id={}", syncId, dataset.getId(), e);
|
||||
result.addError("derived_from:" + dataset.getId());
|
||||
}
|
||||
}
|
||||
return endResult(result);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public SyncResult mergeBelongsToRelations(String graphId, String syncId) {
|
||||
SyncResult result = beginResult("BELONGS_TO", syncId);
|
||||
|
||||
Optional<GraphEntity> defaultOrgOpt = entityRepository.findByGraphIdAndSourceIdAndType(
|
||||
graphId, "org:default", "Org");
|
||||
if (defaultOrgOpt.isEmpty()) {
|
||||
log.warn("[{}] Default org not found, skipping BELONGS_TO", syncId);
|
||||
result.addError("belongs_to:org_missing");
|
||||
return endResult(result);
|
||||
}
|
||||
String orgId = defaultOrgOpt.get().getId();
|
||||
|
||||
// User → Org
|
||||
for (GraphEntity user : entityRepository.findByGraphIdAndType(graphId, "User")) {
|
||||
try {
|
||||
boolean created = mergeRelation(graphId, user.getId(), orgId,
|
||||
"BELONGS_TO", "{\"membership_type\":\"PRIMARY\"}", syncId);
|
||||
if (created) { result.incrementCreated(); } else { result.incrementSkipped(); }
|
||||
} catch (Exception e) {
|
||||
log.warn("[{}] Failed to merge BELONGS_TO for user: id={}", syncId, user.getId(), e);
|
||||
result.addError("belongs_to:user:" + user.getId());
|
||||
}
|
||||
}
|
||||
|
||||
// Dataset → Org
|
||||
for (GraphEntity dataset : entityRepository.findByGraphIdAndType(graphId, "Dataset")) {
|
||||
try {
|
||||
boolean created = mergeRelation(graphId, dataset.getId(), orgId,
|
||||
"BELONGS_TO", "{\"membership_type\":\"PRIMARY\"}", syncId);
|
||||
if (created) { result.incrementCreated(); } else { result.incrementSkipped(); }
|
||||
} catch (Exception e) {
|
||||
log.warn("[{}] Failed to merge BELONGS_TO for dataset: id={}", syncId, dataset.getId(), e);
|
||||
result.addError("belongs_to:dataset:" + dataset.getId());
|
||||
}
|
||||
}
|
||||
return endResult(result);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 内部方法
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* 使用 Cypher MERGE 原子创建或匹配实体,再通过 SDN 更新扩展属性。
|
||||
* <p>
|
||||
* MERGE 基于 (graph_id, source_id, type) 复合唯一约束,
|
||||
* 保证多实例并发写入时不会产生重复节点。
|
||||
*/
|
||||
private void upsertEntity(String graphId, String sourceId, String type,
|
||||
String name, String description,
|
||||
Map<String, Object> props, SyncResult result) {
|
||||
String newId = UUID.randomUUID().toString();
|
||||
|
||||
Map<String, Object> params = new HashMap<>();
|
||||
params.put("graphId", graphId);
|
||||
params.put("sourceId", sourceId);
|
||||
params.put("type", type);
|
||||
params.put("newId", newId);
|
||||
params.put("name", name != null ? name : "");
|
||||
params.put("description", description != null ? description : "");
|
||||
|
||||
// Phase 1: Atomic MERGE — 创建或匹配实体骨架
|
||||
Boolean isNew = neo4jClient.query(
|
||||
"MERGE (e:Entity {graph_id: $graphId, source_id: $sourceId, type: $type}) " +
|
||||
"ON CREATE SET e.id = $newId, e.source_type = 'SYNC', e.confidence = 1.0, " +
|
||||
" e.name = $name, e.description = $description, " +
|
||||
" e.created_at = datetime(), e.updated_at = datetime() " +
|
||||
"ON MATCH SET e.name = $name, e.description = $description, " +
|
||||
" e.updated_at = datetime() " +
|
||||
"RETURN e.id = $newId AS isNew"
|
||||
)
|
||||
.bindAll(params)
|
||||
.fetchAs(Boolean.class)
|
||||
.mappedBy((ts, record) -> record.get("isNew").asBoolean())
|
||||
.one()
|
||||
.orElse(false);
|
||||
|
||||
// Phase 2: 通过 SDN 更新扩展属性(Map 序列化由 SDN 处理)
|
||||
entityRepository.findByGraphIdAndSourceIdAndType(graphId, sourceId, type)
|
||||
.ifPresent(entity -> {
|
||||
entity.setProperties(props != null ? props : new HashMap<>());
|
||||
entityRepository.save(entity);
|
||||
});
|
||||
|
||||
if (isNew) {
|
||||
result.incrementCreated();
|
||||
} else {
|
||||
result.incrementUpdated();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用 Cypher MERGE 创建或匹配关系,保证幂等性。
|
||||
*
|
||||
* @return true 如果是新创建的关系,false 如果已存在
|
||||
*/
|
||||
private boolean mergeRelation(String graphId, String sourceEntityId, String targetEntityId,
|
||||
String relationType, String propertiesJson, String syncId) {
|
||||
String newId = UUID.randomUUID().toString();
|
||||
|
||||
String mergedId = neo4jClient
|
||||
.query(
|
||||
"MATCH (s:Entity {graph_id: $graphId, id: $sourceEntityId}) " +
|
||||
"MATCH (t:Entity {graph_id: $graphId, id: $targetEntityId}) " +
|
||||
"MERGE (s)-[r:" + REL_TYPE + " {graph_id: $graphId, relation_type: $relationType}]->(t) " +
|
||||
"ON CREATE SET r.id = $newId, r.weight = 1.0, r.confidence = 1.0, " +
|
||||
" r.source_id = '', r.properties_json = $propertiesJson, r.created_at = datetime() " +
|
||||
"RETURN r.id AS relId"
|
||||
)
|
||||
.bindAll(Map.of(
|
||||
"graphId", graphId,
|
||||
"sourceEntityId", sourceEntityId,
|
||||
"targetEntityId", targetEntityId,
|
||||
"relationType", relationType,
|
||||
"newId", newId,
|
||||
"propertiesJson", propertiesJson
|
||||
))
|
||||
.fetchAs(String.class)
|
||||
.mappedBy((ts, record) -> record.get("relId").asString())
|
||||
.one()
|
||||
.orElse(null);
|
||||
|
||||
return newId.equals(mergedId);
|
||||
}
|
||||
|
||||
private SyncResult beginResult(String syncType, String syncId) {
|
||||
return SyncResult.builder()
|
||||
.syncType(syncType)
|
||||
.syncId(syncId)
|
||||
.startedAt(LocalDateTime.now())
|
||||
.errors(new ArrayList<>())
|
||||
.build();
|
||||
}
|
||||
|
||||
private SyncResult endResult(SyncResult result) {
|
||||
result.setCompletedAt(LocalDateTime.now());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
package com.datamate.knowledgegraph.domain.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 同步操作结果统计。
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class SyncResult {
|
||||
|
||||
/** 本次同步的追踪标识 */
|
||||
private String syncId;
|
||||
|
||||
/** 同步的实体/关系类型 */
|
||||
private String syncType;
|
||||
|
||||
@Builder.Default
|
||||
private int created = 0;
|
||||
|
||||
@Builder.Default
|
||||
private int updated = 0;
|
||||
|
||||
@Builder.Default
|
||||
private int skipped = 0;
|
||||
|
||||
@Builder.Default
|
||||
private int failed = 0;
|
||||
|
||||
/** 全量对账删除的过期实体数 */
|
||||
@Builder.Default
|
||||
private int purged = 0;
|
||||
|
||||
@Builder.Default
|
||||
private List<String> errors = new ArrayList<>();
|
||||
|
||||
private LocalDateTime startedAt;
|
||||
|
||||
private LocalDateTime completedAt;
|
||||
|
||||
public int total() {
|
||||
return created + updated + skipped + failed;
|
||||
}
|
||||
|
||||
public long durationMillis() {
|
||||
if (startedAt == null || completedAt == null) {
|
||||
return 0;
|
||||
}
|
||||
return java.time.Duration.between(startedAt, completedAt).toMillis();
|
||||
}
|
||||
|
||||
public void incrementCreated() {
|
||||
created++;
|
||||
}
|
||||
|
||||
public void incrementUpdated() {
|
||||
updated++;
|
||||
}
|
||||
|
||||
public void incrementSkipped() {
|
||||
skipped++;
|
||||
}
|
||||
|
||||
public void addError(String error) {
|
||||
failed++;
|
||||
errors.add(error);
|
||||
}
|
||||
}
|
||||
@@ -41,4 +41,12 @@ public interface GraphEntityRepository extends Neo4jRepository<GraphEntity, Stri
|
||||
|
||||
@Query("MATCH (e:Entity {graph_id: $graphId}) RETURN count(e)")
|
||||
long countByGraphId(@Param("graphId") String graphId);
|
||||
|
||||
@Query("MATCH (e:Entity {graph_id: $graphId}) " +
|
||||
"WHERE e.source_id = $sourceId AND e.type = $type " +
|
||||
"RETURN e")
|
||||
Optional<GraphEntity> findByGraphIdAndSourceIdAndType(
|
||||
@Param("graphId") String graphId,
|
||||
@Param("sourceId") String sourceId,
|
||||
@Param("type") String type);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
package com.datamate.knowledgegraph.infrastructure.client;
|
||||
|
||||
import com.datamate.knowledgegraph.infrastructure.neo4j.KnowledgeGraphProperties;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.core.ParameterizedTypeReference;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.client.RestClientException;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 数据管理服务 REST 客户端。
|
||||
* <p>
|
||||
* 通过 HTTP 调用 data-management-service 的 REST API,
|
||||
* 拉取数据集、文件等元数据用于同步到 Neo4j。
|
||||
*/
|
||||
@Component
|
||||
@Slf4j
|
||||
public class DataManagementClient {
|
||||
|
||||
private final RestTemplate restTemplate;
|
||||
private final String baseUrl;
|
||||
private final int pageSize;
|
||||
|
||||
public DataManagementClient(
|
||||
@Qualifier("kgRestTemplate") RestTemplate restTemplate,
|
||||
KnowledgeGraphProperties properties) {
|
||||
this.restTemplate = restTemplate;
|
||||
this.baseUrl = properties.getSync().getDataManagementUrl();
|
||||
this.pageSize = properties.getSync().getPageSize();
|
||||
}
|
||||
|
||||
/**
|
||||
* 拉取所有数据集(自动分页)。
|
||||
*/
|
||||
public List<DatasetDTO> listAllDatasets() {
|
||||
List<DatasetDTO> allDatasets = new ArrayList<>();
|
||||
int page = 0;
|
||||
|
||||
while (true) {
|
||||
String url = baseUrl + "/data-management/datasets?page=" + page + "&size=" + pageSize;
|
||||
log.debug("Fetching datasets: page={}, size={}", page, pageSize);
|
||||
|
||||
try {
|
||||
ResponseEntity<PagedResult<DatasetDTO>> response = restTemplate.exchange(
|
||||
url, HttpMethod.GET, null,
|
||||
new ParameterizedTypeReference<>() {}
|
||||
);
|
||||
|
||||
PagedResult<DatasetDTO> body = response.getBody();
|
||||
if (body == null || body.getContent() == null || body.getContent().isEmpty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
allDatasets.addAll(body.getContent());
|
||||
log.debug("Fetched {} datasets (page {}), total so far: {}",
|
||||
body.getContent().size(), page, allDatasets.size());
|
||||
|
||||
if (page >= body.getTotalPages() - 1) {
|
||||
break;
|
||||
}
|
||||
page++;
|
||||
} catch (RestClientException e) {
|
||||
log.error("Failed to fetch datasets from data-management-service: page={}, url={}", page, url, e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
log.info("Fetched {} datasets in total from data-management-service", allDatasets.size());
|
||||
return allDatasets;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 响应 DTO(仅包含同步所需字段)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
@Data
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public static class PagedResult<T> {
|
||||
private List<T> content;
|
||||
private long page;
|
||||
private long totalElements;
|
||||
private long totalPages;
|
||||
}
|
||||
|
||||
/**
|
||||
* 与 data-management-service 的 DatasetResponse 对齐。
|
||||
*/
|
||||
@Data
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public static class DatasetDTO {
|
||||
private String id;
|
||||
private String name;
|
||||
private String description;
|
||||
private String parentDatasetId;
|
||||
private String datasetType;
|
||||
private String status;
|
||||
private Long totalSize;
|
||||
private Integer fileCount;
|
||||
private String createdBy;
|
||||
private String updatedBy;
|
||||
private LocalDateTime createdAt;
|
||||
private LocalDateTime updatedAt;
|
||||
private List<TagDTO> tags;
|
||||
}
|
||||
|
||||
/**
|
||||
* 与 data-management-service 的 TagResponse 对齐。
|
||||
*/
|
||||
@Data
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public static class TagDTO {
|
||||
private String id;
|
||||
private String name;
|
||||
private String color;
|
||||
private String description;
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,8 @@ public enum KnowledgeGraphErrorCode implements ErrorCode {
|
||||
INVALID_RELATION("knowledge_graph.0005", "无效的关系定义"),
|
||||
IMPORT_FAILED("knowledge_graph.0006", "图谱导入失败"),
|
||||
QUERY_DEPTH_EXCEEDED("knowledge_graph.0007", "查询深度超出限制"),
|
||||
MAX_NODES_EXCEEDED("knowledge_graph.0008", "查询结果节点数超出限制");
|
||||
MAX_NODES_EXCEEDED("knowledge_graph.0008", "查询结果节点数超出限制"),
|
||||
SYNC_FAILED("knowledge_graph.0009", "数据同步失败");
|
||||
|
||||
private final String code;
|
||||
private final String message;
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
package com.datamate.knowledgegraph.infrastructure.neo4j;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.boot.ApplicationArguments;
|
||||
import org.springframework.boot.ApplicationRunner;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.data.neo4j.core.Neo4jClient;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 图谱 Schema 初始化器。
|
||||
* <p>
|
||||
* 应用启动时自动创建 Neo4j 索引和约束。
|
||||
* 所有语句使用 {@code IF NOT EXISTS},保证幂等性。
|
||||
* <p>
|
||||
* 对应 {@code docs/knowledge-graph/schema/schema.cypher} 中的第 1-3 部分。
|
||||
*/
|
||||
@Component
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@Order(1)
|
||||
public class GraphInitializer implements ApplicationRunner {
|
||||
|
||||
private final Neo4jClient neo4jClient;
|
||||
private final KnowledgeGraphProperties properties;
|
||||
|
||||
/**
|
||||
* 需要在启动时执行的 Cypher 语句。
|
||||
* 每条语句必须独立执行(Neo4j 不支持多条 DDL 在同一事务中)。
|
||||
*/
|
||||
private static final List<String> SCHEMA_STATEMENTS = List.of(
|
||||
// 约束(自动创建对应索引)
|
||||
"CREATE CONSTRAINT entity_id_unique IF NOT EXISTS FOR (n:Entity) REQUIRE n.id IS UNIQUE",
|
||||
|
||||
// 同步 upsert 复合唯一约束:防止并发写入产生重复实体
|
||||
"CREATE CONSTRAINT entity_sync_unique IF NOT EXISTS " +
|
||||
"FOR (n:Entity) REQUIRE (n.graph_id, n.source_id, n.type) IS UNIQUE",
|
||||
|
||||
// 单字段索引
|
||||
"CREATE INDEX entity_graph_id IF NOT EXISTS FOR (n:Entity) ON (n.graph_id)",
|
||||
"CREATE INDEX entity_type IF NOT EXISTS FOR (n:Entity) ON (n.type)",
|
||||
"CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)",
|
||||
"CREATE INDEX entity_source_id IF NOT EXISTS FOR (n:Entity) ON (n.source_id)",
|
||||
"CREATE INDEX entity_created_at IF NOT EXISTS FOR (n:Entity) ON (n.created_at)",
|
||||
|
||||
// 复合索引
|
||||
"CREATE INDEX entity_graph_id_type IF NOT EXISTS FOR (n:Entity) ON (n.graph_id, n.type)",
|
||||
"CREATE INDEX entity_graph_id_id IF NOT EXISTS FOR (n:Entity) ON (n.graph_id, n.id)",
|
||||
"CREATE INDEX entity_graph_id_source_id IF NOT EXISTS FOR (n:Entity) ON (n.graph_id, n.source_id)",
|
||||
|
||||
// 全文索引
|
||||
"CREATE FULLTEXT INDEX entity_fulltext IF NOT EXISTS FOR (n:Entity) ON EACH [n.name, n.description]"
|
||||
);
|
||||
|
||||
@Override
|
||||
public void run(ApplicationArguments args) {
|
||||
if (!properties.getSync().isAutoInitSchema()) {
|
||||
log.info("Schema auto-init is disabled, skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("Initializing Neo4j schema: {} statements to execute", SCHEMA_STATEMENTS.size());
|
||||
|
||||
int succeeded = 0;
|
||||
int failed = 0;
|
||||
|
||||
for (String statement : SCHEMA_STATEMENTS) {
|
||||
try {
|
||||
neo4jClient.query(statement).run();
|
||||
succeeded++;
|
||||
log.debug("Schema statement executed: {}", truncate(statement));
|
||||
} catch (Exception e) {
|
||||
failed++;
|
||||
// 约束/索引可能已存在(不同于 IF NOT EXISTS 的实现版本),
|
||||
// 记录警告但不中断启动。
|
||||
log.warn("Schema statement failed (may already exist): {} — {}",
|
||||
truncate(statement), e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
log.info("Neo4j schema initialization completed: succeeded={}, failed={}", succeeded, failed);
|
||||
}
|
||||
|
||||
private static String truncate(String s) {
|
||||
return s.length() <= 100 ? s : s.substring(0, 97) + "...";
|
||||
}
|
||||
}
|
||||
@@ -17,4 +17,32 @@ public class KnowledgeGraphProperties {
|
||||
|
||||
/** 批量导入批次大小 */
|
||||
private int importBatchSize = 100;
|
||||
|
||||
/** 同步相关配置 */
|
||||
private Sync sync = new Sync();
|
||||
|
||||
@Data
|
||||
public static class Sync {
|
||||
|
||||
/** 数据管理服务基础 URL */
|
||||
private String dataManagementUrl = "http://localhost:8080";
|
||||
|
||||
/** 同步每页拉取数量 */
|
||||
private int pageSize = 200;
|
||||
|
||||
/** HTTP 连接超时(毫秒) */
|
||||
private int connectTimeout = 5000;
|
||||
|
||||
/** HTTP 读取超时(毫秒) */
|
||||
private int readTimeout = 30000;
|
||||
|
||||
/** 失败时最大重试次数 */
|
||||
private int maxRetries = 3;
|
||||
|
||||
/** 重试间隔(毫秒) */
|
||||
private long retryInterval = 1000;
|
||||
|
||||
/** 是否在启动时自动初始化 Schema */
|
||||
private boolean autoInitSchema = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
package com.datamate.knowledgegraph.interfaces.dto;
|
||||
|
||||
import com.datamate.knowledgegraph.domain.model.SyncResult;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* 同步结果视图对象。
|
||||
* <p>
|
||||
* 不暴露内部错误详情(errors 列表),仅返回错误计数和 syncId,
|
||||
* 前端可通过 syncId 向运维查询具体日志。
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class SyncResultVO {
|
||||
|
||||
private String syncId;
|
||||
private String syncType;
|
||||
private int created;
|
||||
private int updated;
|
||||
private int skipped;
|
||||
private int failed;
|
||||
private int purged;
|
||||
private int total;
|
||||
private long durationMillis;
|
||||
/** 错误数量(不暴露具体错误信息) */
|
||||
private int errorCount;
|
||||
private LocalDateTime startedAt;
|
||||
private LocalDateTime completedAt;
|
||||
|
||||
public static SyncResultVO from(SyncResult result) {
|
||||
return SyncResultVO.builder()
|
||||
.syncId(result.getSyncId())
|
||||
.syncType(result.getSyncType())
|
||||
.created(result.getCreated())
|
||||
.updated(result.getUpdated())
|
||||
.skipped(result.getSkipped())
|
||||
.failed(result.getFailed())
|
||||
.purged(result.getPurged())
|
||||
.total(result.total())
|
||||
.durationMillis(result.durationMillis())
|
||||
.errorCount(result.getErrors() != null ? result.getErrors().size() : 0)
|
||||
.startedAt(result.getStartedAt())
|
||||
.completedAt(result.getCompletedAt())
|
||||
.build();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
package com.datamate.knowledgegraph.interfaces.rest;
|
||||
|
||||
import com.datamate.knowledgegraph.application.GraphSyncService;
|
||||
import com.datamate.knowledgegraph.domain.model.SyncResult;
|
||||
import com.datamate.knowledgegraph.interfaces.dto.SyncResultVO;
|
||||
import jakarta.validation.constraints.Pattern;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 知识图谱数据同步 API。
|
||||
* <p>
|
||||
* 提供手动触发 MySQL → Neo4j 同步的 REST 端点。
|
||||
* 生产环境中也可通过定时任务自动触发。
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("/knowledge-graph/{graphId}/sync")
|
||||
@RequiredArgsConstructor
|
||||
@Validated
|
||||
public class GraphSyncController {
|
||||
|
||||
private static final String UUID_REGEX =
|
||||
"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$";
|
||||
|
||||
private final GraphSyncService syncService;
|
||||
|
||||
/**
|
||||
* 全量同步:拉取所有实体并构建关系。
|
||||
*/
|
||||
@PostMapping("/full")
|
||||
public List<SyncResultVO> syncAll(
|
||||
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId) {
|
||||
List<SyncResult> results = syncService.syncAll(graphId);
|
||||
return results.stream().map(SyncResultVO::from).toList();
|
||||
}
|
||||
|
||||
/**
|
||||
* 同步数据集实体。
|
||||
*/
|
||||
@PostMapping("/datasets")
|
||||
public SyncResultVO syncDatasets(
|
||||
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId) {
|
||||
return SyncResultVO.from(syncService.syncDatasets(graphId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 同步字段实体。
|
||||
*/
|
||||
@PostMapping("/fields")
|
||||
public SyncResultVO syncFields(
|
||||
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId) {
|
||||
return SyncResultVO.from(syncService.syncFields(graphId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 同步用户实体。
|
||||
*/
|
||||
@PostMapping("/users")
|
||||
public SyncResultVO syncUsers(
|
||||
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId) {
|
||||
return SyncResultVO.from(syncService.syncUsers(graphId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 同步组织实体。
|
||||
*/
|
||||
@PostMapping("/orgs")
|
||||
public SyncResultVO syncOrgs(
|
||||
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId) {
|
||||
return SyncResultVO.from(syncService.syncOrgs(graphId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 HAS_FIELD 关系。
|
||||
*/
|
||||
@PostMapping("/relations/has-field")
|
||||
public SyncResultVO buildHasFieldRelations(
|
||||
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId) {
|
||||
return SyncResultVO.from(syncService.buildHasFieldRelations(graphId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 DERIVED_FROM 关系。
|
||||
*/
|
||||
@PostMapping("/relations/derived-from")
|
||||
public SyncResultVO buildDerivedFromRelations(
|
||||
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId) {
|
||||
return SyncResultVO.from(syncService.buildDerivedFromRelations(graphId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 BELONGS_TO 关系。
|
||||
*/
|
||||
@PostMapping("/relations/belongs-to")
|
||||
public SyncResultVO buildBelongsToRelations(
|
||||
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId) {
|
||||
return SyncResultVO.from(syncService.buildBelongsToRelations(graphId));
|
||||
}
|
||||
}
|
||||
@@ -23,3 +23,19 @@ datamate:
|
||||
max-nodes-per-query: ${KG_MAX_NODES:500}
|
||||
# 批量导入批次大小
|
||||
import-batch-size: ${KG_IMPORT_BATCH_SIZE:100}
|
||||
# MySQL → Neo4j 同步配置
|
||||
sync:
|
||||
# 数据管理服务地址
|
||||
data-management-url: ${DATA_MANAGEMENT_URL:http://localhost:8080}
|
||||
# 每页拉取数量
|
||||
page-size: ${KG_SYNC_PAGE_SIZE:200}
|
||||
# HTTP 连接超时(毫秒)
|
||||
connect-timeout: ${KG_SYNC_CONNECT_TIMEOUT:5000}
|
||||
# HTTP 读取超时(毫秒)
|
||||
read-timeout: ${KG_SYNC_READ_TIMEOUT:30000}
|
||||
# 失败时最大重试次数
|
||||
max-retries: ${KG_SYNC_MAX_RETRIES:3}
|
||||
# 重试间隔(毫秒)
|
||||
retry-interval: ${KG_SYNC_RETRY_INTERVAL:1000}
|
||||
# 是否在启动时自动初始化 Schema
|
||||
auto-init-schema: ${KG_AUTO_INIT_SCHEMA:true}
|
||||
|
||||
Reference in New Issue
Block a user