You've already forked DataMate
* Enhance CleaningTaskService to track cleaning process progress and update ExecutorType to DATAMATE * Refactor project to use 'datamate' naming convention for services and configurations
182 lines
4.9 KiB
YAML
182 lines
4.9 KiB
YAML
# 数据引擎平台 - 主应用配置
|
|
spring:
|
|
application:
|
|
name: data-mate-platform
|
|
|
|
# 暂时排除Spring Security自动配置(开发阶段使用)
|
|
autoconfigure:
|
|
exclude:
|
|
- org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration
|
|
- org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration
|
|
|
|
# 数据源配置
|
|
datasource:
|
|
driver-class-name: com.mysql.cj.jdbc.Driver
|
|
url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
|
|
username: ${DB_USERNAME:root}
|
|
password: ${DB_PASSWORD:Huawei@123}
|
|
hikari:
|
|
maximum-pool-size: 20
|
|
minimum-idle: 5
|
|
connection-timeout: 30000
|
|
idle-timeout: 600000
|
|
max-lifetime: 1800000
|
|
|
|
# Elasticsearch配置
|
|
elasticsearch:
|
|
uris: ${ES_URIS:http://localhost:9200}
|
|
username: ${ES_USERNAME:}
|
|
password: ${ES_PASSWORD:}
|
|
connection-timeout: 10s
|
|
socket-timeout: 30s
|
|
|
|
# Jackson配置
|
|
jackson:
|
|
time-zone: Asia/Shanghai
|
|
date-format: yyyy-MM-dd HH:mm:ss
|
|
serialization:
|
|
write-dates-as-timestamps: false
|
|
deserialization:
|
|
fail-on-unknown-properties: false
|
|
|
|
# 文件上传配置
|
|
servlet:
|
|
multipart:
|
|
max-file-size: 100MB
|
|
max-request-size: 100MB
|
|
|
|
# 任务调度配置
|
|
task:
|
|
execution:
|
|
pool:
|
|
core-size: ${TASK_EXECUTION_CORE_SIZE:10}
|
|
max-size: ${TASK_EXECUTION_MAX_SIZE:20}
|
|
queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100}
|
|
keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s}
|
|
scheduling:
|
|
pool:
|
|
size: ${TASK_SCHEDULING_POOL_SIZE:5}
|
|
config:
|
|
import:
|
|
- classpath:config/application-datacollection.yml
|
|
- classpath:config/application-datamanagement.yml
|
|
|
|
# MyBatis配置(需在顶层,不在 spring 下)
|
|
mybatis-plus:
|
|
configuration:
|
|
map-underscore-to-camel-case: true
|
|
default-fetch-size: 100
|
|
default-statement-timeout: 30
|
|
use-generated-keys: true
|
|
cache-enabled: true
|
|
lazy-loading-enabled: false
|
|
multiple-result-sets-enabled: true
|
|
use-column-label: true
|
|
auto-mapping-behavior: partial
|
|
auto-mapping-unknown-column-behavior: none
|
|
default-executor-type: simple
|
|
call-setters-on-nulls: false
|
|
return-instance-for-empty-row: false
|
|
log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl
|
|
mapper-locations:
|
|
- classpath*:mappers/**/*.xml
|
|
type-aliases-package:
|
|
- com.datamate.collection.domain.model
|
|
- com.datamate.datamanagement.domain.model.dataset
|
|
|
|
# 应用配置
|
|
server:
|
|
port: ${SERVER_PORT:8080}
|
|
servlet:
|
|
context-path: /api
|
|
encoding:
|
|
charset: UTF-8
|
|
enabled: true
|
|
force: true
|
|
|
|
# 日志配置
|
|
logging:
|
|
config: file:/opt/backend/log4j2.xml
|
|
|
|
# Actuator配置
|
|
management:
|
|
endpoints:
|
|
web:
|
|
exposure:
|
|
include: health,info,metrics,prometheus
|
|
endpoint:
|
|
health:
|
|
show-details: when-authorized
|
|
health:
|
|
elasticsearch:
|
|
enabled: false # 禁用Elasticsearch健康检查
|
|
|
|
# 平台配置
|
|
datamate:
|
|
# JWT配置
|
|
jwt:
|
|
secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration}
|
|
expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒
|
|
header: Authorization
|
|
prefix: "Bearer "
|
|
|
|
# 文件存储配置
|
|
storage:
|
|
type: ${STORAGE_TYPE:local} # local, minio, s3
|
|
local:
|
|
base-path: ${STORAGE_LOCAL_PATH:./data/storage}
|
|
minio:
|
|
endpoint: ${MINIO_ENDPOINT:http://localhost:9000}
|
|
access-key: ${MINIO_ACCESS_KEY:minioadmin}
|
|
secret-key: ${MINIO_SECRET_KEY:minioadmin}
|
|
bucket-name: ${MINIO_BUCKET:data-mate}
|
|
|
|
# Ray执行器配置
|
|
ray:
|
|
enabled: ${RAY_ENABLED:false}
|
|
address: ${RAY_ADDRESS:ray://localhost:10001}
|
|
runtime-env:
|
|
working-dir: ${RAY_WORKING_DIR:./runtime/python-executor}
|
|
pip-packages:
|
|
- "ray[default]==2.7.0"
|
|
- "pandas"
|
|
- "numpy"
|
|
- "data-juicer"
|
|
|
|
# 数据归集服务配置(可由模块导入叠加)
|
|
data-collection: {}
|
|
|
|
# 算子市场配置
|
|
operator-market:
|
|
repository-path: ${OPERATOR_REPO_PATH:./runtime/operators}
|
|
registry-url: ${OPERATOR_REGISTRY_URL:}
|
|
max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB}
|
|
|
|
# 数据处理配置
|
|
data-processing:
|
|
max-file-size: ${MAX_FILE_SIZE:1GB}
|
|
temp-dir: ${TEMP_DIR:./data/temp}
|
|
batch-size: ${BATCH_SIZE:1000}
|
|
|
|
# 标注配置
|
|
annotation:
|
|
auto-annotation:
|
|
enabled: ${AUTO_ANNOTATION_ENABLED:true}
|
|
model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:}
|
|
quality-control:
|
|
enabled: ${QC_ENABLED:true}
|
|
threshold: ${QC_THRESHOLD:0.8}
|
|
|
|
# RAG配置
|
|
rag:
|
|
embedding:
|
|
model: ${RAG_EMBEDDING_MODEL:text-embedding-ada-002}
|
|
api-key: ${RAG_API_KEY:}
|
|
dimension: ${RAG_DIMENSION:1536}
|
|
chunk:
|
|
size: ${RAG_CHUNK_SIZE:512}
|
|
overlap: ${RAG_CHUNK_OVERLAP:50}
|
|
retrieval:
|
|
top-k: ${RAG_TOP_K:5}
|
|
score-threshold: ${RAG_SCORE_THRESHOLD:0.7}
|