Files
DataMate/editions/enterprise/config/application.yml

182 lines
4.9 KiB
YAML

# 数据引擎平台 - 主应用配置
spring:
application:
name: data-mate-platform
# 暂时排除Spring Security自动配置(开发阶段使用)
autoconfigure:
exclude:
- org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration
- org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration
# 数据源配置
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
username: ${DB_USERNAME:root}
password: ${DB_PASSWORD:password}
hikari:
maximum-pool-size: 20
minimum-idle: 5
connection-timeout: 30000
idle-timeout: 600000
max-lifetime: 1800000
# Elasticsearch配置
elasticsearch:
uris: ${ES_URIS:http://localhost:9200}
username: ${ES_USERNAME:}
password: ${ES_PASSWORD:}
connection-timeout: 10s
socket-timeout: 30s
# Jackson配置
jackson:
time-zone: Asia/Shanghai
date-format: yyyy-MM-dd HH:mm:ss
serialization:
write-dates-as-timestamps: false
deserialization:
fail-on-unknown-properties: false
# 文件上传配置
servlet:
multipart:
max-file-size: 100MB
max-request-size: 100MB
# 任务调度配置
task:
execution:
pool:
core-size: ${TASK_EXECUTION_CORE_SIZE:10}
max-size: ${TASK_EXECUTION_MAX_SIZE:20}
queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100}
keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s}
scheduling:
pool:
size: ${TASK_SCHEDULING_POOL_SIZE:5}
config:
import:
- classpath:config/application-datacollection.yml
- classpath:config/application-datamanagement.yml
# MyBatis配置(需在顶层,不在 spring 下)
mybatis-plus:
configuration:
map-underscore-to-camel-case: true
default-fetch-size: 100
default-statement-timeout: 30
use-generated-keys: true
cache-enabled: true
lazy-loading-enabled: false
multiple-result-sets-enabled: true
use-column-label: true
auto-mapping-behavior: partial
auto-mapping-unknown-column-behavior: none
default-executor-type: simple
call-setters-on-nulls: false
return-instance-for-empty-row: false
log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl
mapper-locations:
- classpath*:mappers/**/*.xml
type-aliases-package:
- com.datamate.collection.domain.model
- com.datamate.datamanagement.domain.model.dataset
# 应用配置
server:
port: ${SERVER_PORT:8080}
servlet:
context-path: /api
encoding:
charset: UTF-8
enabled: true
force: true
# 日志配置
logging:
config: file:/opt/backend/log4j2.xml
# Actuator配置
management:
endpoints:
web:
exposure:
include: health,info,metrics,prometheus
endpoint:
health:
show-details: when-authorized
health:
elasticsearch:
enabled: false # 禁用Elasticsearch健康检查
# 平台配置
datamate:
# JWT配置
jwt:
secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration}
expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒
header: Authorization
prefix: "Bearer "
# 文件存储配置
storage:
type: ${STORAGE_TYPE:local} # local, minio, s3
local:
base-path: ${STORAGE_LOCAL_PATH:./data/storage}
minio:
endpoint: ${MINIO_ENDPOINT:http://localhost:9000}
access-key: ${MINIO_ACCESS_KEY:minioadmin}
secret-key: ${MINIO_SECRET_KEY:minioadmin}
bucket-name: ${MINIO_BUCKET:data-mate}
# Ray执行器配置
ray:
enabled: ${RAY_ENABLED:false}
address: ${RAY_ADDRESS:ray://localhost:10001}
runtime-env:
working-dir: ${RAY_WORKING_DIR:./runtime/python-executor}
pip-packages:
- "ray[default]==2.7.0"
- "pandas"
- "numpy"
- "data-juicer"
# 数据归集服务配置(可由模块导入叠加)
data-collection: {}
# 算子市场配置
operator-market:
repository-path: ${OPERATOR_REPO_PATH:./runtime/operators}
registry-url: ${OPERATOR_REGISTRY_URL:}
max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB}
# 数据处理配置
data-processing:
max-file-size: ${MAX_FILE_SIZE:1GB}
temp-dir: ${TEMP_DIR:./data/temp}
batch-size: ${BATCH_SIZE:1000}
# 标注配置
annotation:
auto-annotation:
enabled: ${AUTO_ANNOTATION_ENABLED:true}
model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:}
quality-control:
enabled: ${QC_ENABLED:true}
threshold: ${QC_THRESHOLD:0.8}
# RAG配置
rag:
embedding:
model: ${RAG_EMBEDDING_MODEL:text-embedding-ada-002}
api-key: ${RAG_API_KEY:}
dimension: ${RAG_DIMENSION:1536}
chunk:
size: ${RAG_CHUNK_SIZE:512}
overlap: ${RAG_CHUNK_OVERLAP:50}
retrieval:
top-k: ${RAG_TOP_K:5}
score-threshold: ${RAG_SCORE_THRESHOLD:0.7}