# 数据引擎平台 - 主应用配置 spring: application: name: datamate # 暂时排除Spring Security自动配置(开发阶段使用) autoconfigure: exclude: - org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration - org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration # 数据源配置 datasource: driver-class-name: com.mysql.cj.jdbc.Driver url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true username: ${DB_USERNAME:root} password: ${DB_PASSWORD:password} hikari: maximum-pool-size: 20 minimum-idle: 5 connection-timeout: 30000 idle-timeout: 600000 max-lifetime: 1800000 # Elasticsearch配置 elasticsearch: uris: ${ES_URIS:http://localhost:9200} username: ${ES_USERNAME:} password: ${ES_PASSWORD:} connection-timeout: 10s socket-timeout: 30s # Jackson配置 jackson: time-zone: Asia/Shanghai date-format: yyyy-MM-dd HH:mm:ss serialization: write-dates-as-timestamps: false deserialization: fail-on-unknown-properties: false # 文件上传配置 servlet: multipart: max-file-size: 100MB max-request-size: 100MB # 任务调度配置 task: execution: pool: core-size: ${TASK_EXECUTION_CORE_SIZE:10} max-size: ${TASK_EXECUTION_MAX_SIZE:20} queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100} keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s} scheduling: pool: size: ${TASK_SCHEDULING_POOL_SIZE:5} config: import: - classpath:config/application-datacollection.yml - classpath:config/application-datamanagement.yml # MyBatis配置(需在顶层,不在 spring 下) mybatis-plus: configuration: map-underscore-to-camel-case: true default-fetch-size: 100 default-statement-timeout: 30 use-generated-keys: true cache-enabled: true lazy-loading-enabled: false multiple-result-sets-enabled: true use-column-label: true auto-mapping-behavior: partial auto-mapping-unknown-column-behavior: none default-executor-type: simple call-setters-on-nulls: false return-instance-for-empty-row: false log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl mapper-locations: - classpath*:mappers/**/*.xml type-aliases-package: - com.datamate.collection.domain.model - com.datamate.datamanagement.domain.model.dataset # 应用配置 server: port: ${SERVER_PORT:8080} servlet: context-path: /api encoding: charset: UTF-8 enabled: true force: true # 日志配置 logging: config: file:/opt/backend/log4j2.xml # Actuator配置 management: endpoints: web: exposure: include: health,info,metrics,prometheus endpoint: health: show-details: when-authorized health: elasticsearch: enabled: false # 禁用Elasticsearch健康检查 # 平台配置 datamate: # JWT配置 jwt: secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration} expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒 header: Authorization prefix: "Bearer " # 文件存储配置 storage: type: ${STORAGE_TYPE:local} # local, minio, s3 local: base-path: ${STORAGE_LOCAL_PATH:./data/storage} minio: endpoint: ${MINIO_ENDPOINT:http://localhost:9000} access-key: ${MINIO_ACCESS_KEY:minioadmin} secret-key: ${MINIO_SECRET_KEY:minioadmin} bucket-name: ${MINIO_BUCKET:data-mate} # Ray执行器配置 ray: enabled: ${RAY_ENABLED:false} address: ${RAY_ADDRESS:ray://localhost:10001} runtime-env: working-dir: ${RAY_WORKING_DIR:./runtime/python-executor} pip-packages: - "ray[default]==2.7.0" - "pandas" - "numpy" - "data-juicer" # 数据归集服务配置(可由模块导入叠加) data-collection: {} # 算子市场配置 operator-market: repository-path: ${OPERATOR_REPO_PATH:./runtime/operators} registry-url: ${OPERATOR_REGISTRY_URL:} max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB} # 数据处理配置 data-processing: max-file-size: ${MAX_FILE_SIZE:1GB} temp-dir: ${TEMP_DIR:./data/temp} batch-size: ${BATCH_SIZE:1000} # 标注配置 annotation: auto-annotation: enabled: ${AUTO_ANNOTATION_ENABLED:true} model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:} quality-control: enabled: ${QC_ENABLED:true} threshold: ${QC_THRESHOLD:0.8} # RAG配置 rag: embedding: model: ${RAG_EMBEDDING_MODEL:text-embedding-ada-002} api-key: ${RAG_API_KEY:} dimension: ${RAG_DIMENSION:1536} chunk: size: ${RAG_CHUNK_SIZE:512} overlap: ${RAG_CHUNK_OVERLAP:50} retrieval: top-k: ${RAG_TOP_K:5} score-threshold: ${RAG_SCORE_THRESHOLD:0.7}