init datamate

This commit is contained in:
Dallas98
2025-10-21 23:00:48 +08:00
commit 1c97afed7d
692 changed files with 135442 additions and 0 deletions

View File

@@ -0,0 +1,169 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>data-mate-platform</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>main-application</artifactId>
<packaging>jar</packaging>
<name>Data Mate Platform - Main Application</name>
<description>主启动应用,集成所有服务模</description>
<dependencies>
<!-- Spring Boot Starters -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<dependency>
<groupId>jakarta.persistence</groupId>
<artifactId>jakarta.persistence-api</artifactId>
</dependency>
<!-- 共享库依赖 -->
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>security-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.26.1</version>
</dependency>
<!-- 核心服务依赖 -->
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-management-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-collection-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>operator-market-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-cleaning-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-synthesis-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-annotation-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-evaluation-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>pipeline-orchestration-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>execution-engine-service</artifactId>
<version>${project.version}</version>
</dependency>
<!-- RAG服务依赖 -->
<dependency>
<groupId>com.datamate</groupId>
<artifactId>rag-indexer-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>rag-query-service</artifactId>
<version>${project.version}</version>
</dependency>
<!-- MyBatis Dependencies -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
</dependency>
<!-- Database -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.33</version>
<scope>runtime</scope>
</dependency>
<!-- 测试依赖 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<compilerArgs>
<arg>-parameters</arg>
</compilerArgs>
</configuration>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>${spring-boot.version}</version>
<configuration>
<finalName>data-mate</finalName>
<mainClass>com.datamate.main.DataMatePlatformApplication</mainClass>
</configuration>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,49 @@
package com.datamate.main;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.transaction.annotation.EnableTransactionManagement;
/**
* 数据引擎平台主应用
* 聚合所有业务服务JAR包的微服务启动类
*
* @author Data Mate Team
* @version 1.0.0
*/
@SpringBootApplication
@ComponentScan(basePackages = {
"com.datamate.main",
"com.datamate.datamanagement",
"com.datamate.collection",
"com.datamate.operator",
"com.datamate.cleaning",
"com.datamate.synthesis",
"com.datamate.annotation",
"com.datamate.evaluation",
"com.datamate.pipeline",
"com.datamate.execution",
"com.datamate.rag",
"com.datamate.shared",
"com.datamate.common"
})
@MapperScan(basePackages = {
"com.datamate.collection.infrastructure.persistence.mapper",
"com.datamate.datamanagement.infrastructure.persistence.mapper",
"com.datamate.operator.infrastructure.persistence.mapper",
"com.datamate.cleaning.infrastructure.persistence.mapper",
"com.datamate.common.infrastructure.mapper"
})
@EnableTransactionManagement
@EnableAsync
@EnableScheduling
public class DataMatePlatformApplication {
public static void main(String[] args) {
SpringApplication.run(DataMatePlatformApplication.class, args);
}
}

View File

@@ -0,0 +1,26 @@
package com.datamate.main.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.security.config.annotation.web.builders.HttpSecurity;
import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity;
import org.springframework.security.web.SecurityFilterChain;
/**
* 安全配置 - 暂时禁用所有认证
* 开发阶段使用,生产环境需要启用认证
*/
@Configuration
@EnableWebSecurity
public class SecurityConfig {
@Bean
public SecurityFilterChain filterChain(HttpSecurity http) throws Exception {
http.csrf(csrf -> csrf.disable())
.authorizeHttpRequests(authz -> authz
.anyRequest().permitAll() // 允许所有请求无需认证
);
return http.build();
}
}

View File

@@ -0,0 +1,179 @@
# 数据引擎平台 - 主应用配置
spring:
application:
name: data-mate-platform
# 暂时排除Spring Security自动配置(开发阶段使用)
autoconfigure:
exclude:
- org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration
- org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration
# 数据源配置
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
username: ${DB_USERNAME:root}
password: ${DB_PASSWORD:Huawei@123}
hikari:
maximum-pool-size: 20
minimum-idle: 5
connection-timeout: 30000
idle-timeout: 600000
max-lifetime: 1800000
# Elasticsearch配置
elasticsearch:
uris: ${ES_URIS:http://localhost:9200}
username: ${ES_USERNAME:}
password: ${ES_PASSWORD:}
connection-timeout: 10s
socket-timeout: 30s
# Jackson配置
jackson:
time-zone: Asia/Shanghai
date-format: yyyy-MM-dd HH:mm:ss
serialization:
write-dates-as-timestamps: false
deserialization:
fail-on-unknown-properties: false
# 文件上传配置
servlet:
multipart:
max-file-size: 100MB
max-request-size: 100MB
# 任务调度配置
task:
execution:
pool:
core-size: ${TASK_EXECUTION_CORE_SIZE:10}
max-size: ${TASK_EXECUTION_MAX_SIZE:20}
queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100}
keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s}
scheduling:
pool:
size: ${TASK_SCHEDULING_POOL_SIZE:5}
config:
import:
- classpath:config/application-datacollection.yml
- classpath:config/application-datamanagement.yml
# MyBatis配置(需在顶层,不在 spring 下)
mybatis-plus:
configuration:
map-underscore-to-camel-case: true
default-fetch-size: 100
default-statement-timeout: 30
use-generated-keys: true
cache-enabled: true
lazy-loading-enabled: false
multiple-result-sets-enabled: true
use-column-label: true
auto-mapping-behavior: partial
auto-mapping-unknown-column-behavior: none
default-executor-type: simple
call-setters-on-nulls: false
return-instance-for-empty-row: false
log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl
mapper-locations:
- classpath*:mappers/**/*.xml
# 应用配置
server:
port: ${SERVER_PORT:8080}
servlet:
context-path: /api
encoding:
charset: UTF-8
enabled: true
force: true
# 日志配置
logging:
config: classpath:log4j2.xml
# Actuator配置
management:
endpoints:
web:
exposure:
include: health,info,metrics,prometheus
endpoint:
health:
show-details: when-authorized
health:
elasticsearch:
enabled: false # 禁用Elasticsearch健康检查
# 平台配置
datamate:
# JWT配置
jwt:
secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration}
expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒
header: Authorization
prefix: "Bearer "
# 文件存储配置
storage:
type: ${STORAGE_TYPE:local} # local, minio, s3
local:
base-path: ${STORAGE_LOCAL_PATH:./data/storage}
minio:
endpoint: ${MINIO_ENDPOINT:http://localhost:9000}
access-key: ${MINIO_ACCESS_KEY:minioadmin}
secret-key: ${MINIO_SECRET_KEY:minioadmin}
bucket-name: ${MINIO_BUCKET:data-mate}
# Ray执行器配置
ray:
enabled: ${RAY_ENABLED:false}
address: ${RAY_ADDRESS:ray://localhost:10001}
runtime-env:
working-dir: ${RAY_WORKING_DIR:./runtime/python-executor}
pip-packages:
- "ray[default]==2.7.0"
- "pandas"
- "numpy"
- "data-juicer"
# 数据归集服务配置(可由模块导入叠加)
data-collection: {}
# 算子市场配置
operator-market:
repository-path: ${OPERATOR_REPO_PATH:./runtime/operators}
registry-url: ${OPERATOR_REGISTRY_URL:}
max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB}
# 数据处理配置
data-processing:
max-file-size: ${MAX_FILE_SIZE:1GB}
temp-dir: ${TEMP_DIR:./data/temp}
batch-size: ${BATCH_SIZE:1000}
# 标注配置
annotation:
auto-annotation:
enabled: ${AUTO_ANNOTATION_ENABLED:true}
model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:}
quality-control:
enabled: ${QC_ENABLED:true}
threshold: ${QC_THRESHOLD:0.8}
# RAG配置
rag:
embedding:
model: ${RAG_EMBEDDING_MODEL:text-embedding-ada-002}
api-key: ${RAG_API_KEY:}
dimension: ${RAG_DIMENSION:1536}
chunk:
size: ${RAG_CHUNK_SIZE:512}
overlap: ${RAG_CHUNK_OVERLAP:50}
retrieval:
top-k: ${RAG_TOP_K:5}
score-threshold: ${RAG_SCORE_THRESHOLD:0.7}

View File

@@ -0,0 +1,23 @@
datamate:
data-collection:
# DataX配置
datax:
home-path: ${DATAX_HOME:/opt/datax}
python-path: ${DATAX_PYTHON_PATH:python3}
job-config-path: ${DATAX_JOB_PATH:./data/temp/datax/jobs}
log-path: ${DATAX_LOG_PATH:./logs/datax}
max-memory: ${DATAX_MAX_MEMORY:2048}
channel-count: ${DATAX_CHANNEL_COUNT:5}
# 执行配置
execution:
max-concurrent-tasks: ${DATA_COLLECTION_MAX_CONCURRENT_TASKS:10}
task-timeout-minutes: ${DATA_COLLECTION_TASK_TIMEOUT:120}
retry-count: ${DATA_COLLECTION_RETRY_COUNT:3}
retry-interval-seconds: ${DATA_COLLECTION_RETRY_INTERVAL:30}
# 监控配置
monitoring:
status-check-interval-seconds: ${DATA_COLLECTION_STATUS_CHECK_INTERVAL:30}
log-retention-days: ${DATA_COLLECTION_LOG_RETENTION:30}
enable-metrics: ${DATA_COLLECTION_ENABLE_METRICS:true}

View File

@@ -0,0 +1,11 @@
datamate:
datamanagement:
file-storage:
upload-dir: ${FILE_UPLOAD_DIR:./uploads}
max-file-size: 10485760 # 10MB
max-request-size: 52428800 # 50MB
cache:
ttl: 3600
max-size: 1000
# MyBatis is configured centrally in main-application (mapper-locations & aliases)
# to avoid list overriding issues when importing multiple module configs.

View File

@@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN" monitorInterval="30">
<Properties>
<Property name="LOG_PATH">/var/log/data-mate/backend</Property>
<Property name="LOG_PATTERN">%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</Property>
<Property name="MAX_FILE_SIZE">100MB</Property>
<Property name="MAX_HISTORY">30</Property>
<Property name="LOG_LEVEL">INFO</Property>
<Property name="SQL_LOG_LEVEL">WARN</Property>
</Properties>
<Appenders>
<!-- 控制台输出 -->
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="${LOG_PATTERN}" />
</Console>
<!-- 滚动文件输出 -->
<RollingFile name="File" fileName="${LOG_PATH}/app.log" filePattern="${LOG_PATH}/app-%d{yyyy-MM-dd}-%i.log">
<PatternLayout pattern="${LOG_PATTERN}" />
<Policies>
<TimeBasedTriggeringPolicy interval="1" modulate="true" />
<SizeBasedTriggeringPolicy size="${MAX_FILE_SIZE}" />
</Policies>
<DefaultRolloverStrategy max="${MAX_HISTORY}" />
</RollingFile>
</Appenders>
<Loggers>
<!-- 自定义日志器 -->
<Logger name="com.datamate" level="${LOG_LEVEL}" />
<Logger name="org.springframework.security" level="${LOG_LEVEL}" />
<Logger name="org.hibernate.SQL" level="${SQL_LOG_LEVEL}" />
<Logger name="org.hibernate.type.descriptor.sql.BasicBinder" level="${SQL_LOG_LEVEL}" />
<!-- 根日志器 -->
<Root level="INFO">
<AppenderRef ref="Console" />
<AppenderRef ref="File" />
</Root>
</Loggers>
</Configuration>