Merge branch 'ModelEngine-Group:main' into main

2025-10-23 09:08:20 +08:00
parent 69b9517181 c998de2e9d
commit 665c11f2a1
83 changed files with 3321 additions and 761 deletions
--- a/.github/workflows/docker-image-backend.yml
+++ b/.github/workflows/docker-image-backend.yml
@@ -2,13 +2,13 @@ name: Backend Docker Image CI
 on:
  push:
-    branches: [ "develop_930" ]
+    branches: [ "main" ]
    paths:
      - 'backend/**'
      - 'scripts/images/backend/**'
      - '.github/workflows/docker-image-backend.yml'
  pull_request:
-    branches: [ "develop_930" ]
+    branches: [ "main" ]
    paths:
      - 'backend/**'
      - 'scripts/images/backend/**'
@@ -16,12 +16,12 @@ on:
  workflow_dispatch:
 jobs:
-
+  call-docker-build:
-  build:
+    name: Build and Push Backend Docker Image
-
+    uses: ./.github/workflows/docker-images-reusable.yml
-    runs-on: ubuntu-latest
+    permissions:
-
+      contents: read
-    steps:
+      packages: write
-    - uses: actions/checkout@v4
+    with:
-    - name: Build the Backend Docker image
+      service_name: backend
-      run: make build-backend
+      build_dir: .
--- a/.github/workflows/docker-image-frontend.yml
+++ b/.github/workflows/docker-image-frontend.yml
@@ -2,13 +2,13 @@ name: Frontend Docker Image CI
 on:
  push:
-    branches: [ "develop_930" ]
+    branches: [ "main" ]
    paths:
      - 'frontend/**'
      - 'scripts/images/frontend/**'
      - '.github/workflows/docker-image-frontend.yml'
  pull_request:
-    branches: [ "develop_930" ]
+    branches: [ "main" ]
    paths:
      - 'frontend/**'
      - 'scripts/images/frontend/**'
@@ -16,12 +16,12 @@ on:
  workflow_dispatch:
 jobs:
-
+  call-docker-build:
-  build:
+    name: Build and Push Frontend Docker Image
-
+    uses: ./.github/workflows/docker-images-reusable.yml
-    runs-on: ubuntu-latest
+    permissions:
-
+      contents: read
-    steps:
+      packages: write
-      - uses: actions/checkout@v4
+    with:
-      - name: Build the Frontend Docker image
+      service_name: frontend
-        run: make build-frontend
+      build_dir: .
--- a/.github/workflows/docker-image-runtime.yml
+++ b/.github/workflows/docker-image-runtime.yml
@@ -0,0 +1,27 @@
 name: Runtime Docker Image CI
 on:
  push:
    branches: [ "main" ]
    paths:
      - 'runtime/**'
      - 'scripts/images/runtime/**'
      - '.github/workflows/docker-image-runtime.yml'
  pull_request:
    branches: [ "main" ]
    paths:
      - 'runtime/**'
      - 'scripts/images/runtime/**'
      - '.github/workflows/docker-image-runtime.yml'
  workflow_dispatch:
 jobs:
  call-docker-build:
    name: Build and Push Runtime Docker Image
    uses: ./.github/workflows/docker-images-reusable.yml
    permissions:
      contents: read
      packages: write
    with:
      service_name: runtime
      build_dir: .
--- a/.github/workflows/docker-images-reusable.yml
+++ b/.github/workflows/docker-images-reusable.yml
@@ -0,0 +1,57 @@
 name: Docker Image Build & Push
 on:
  workflow_call:
    inputs:
      service_name:
        required: true
        type: string
      build_dir:
        required: true
        type: string
 jobs:
  build-and-push:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    steps:
      - uses: actions/checkout@v4
      - name: Login to GitHub Container Registry
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Set Docker Image Tag
        id: set-tag
        run: |
          LOWERCASE_REPO=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
          BASE_IMAGE=ghcr.io/$LOWERCASE_REPO/datamate-${{ inputs.service_name }}
          if [[ $GITHUB_REF == refs/tags/v* ]]; then
            TAG=${GITHUB_REF#refs/tags/v}
            echo "TAGS=$BASE_IMAGE:$TAG" >> $GITHUB_OUTPUT
          elif [[ $GITHUB_REF == refs/heads/main ]]; then
            echo "TAGS=$BASE_IMAGE:latest" >> $GITHUB_OUTPUT
          else
            echo "TAGS=$BASE_IMAGE:temp" >> $GITHUB_OUTPUT
          fi
      - name: Build Docker Image
        run: |
          make build-${{ inputs.service_name }} VERSION=latest
      - name: Tag Docker Image
        run: |
          docker tag datamate-${{ inputs.service_name }}:latest ${{ steps.set-tag.outputs.TAGS }}
      - name: Push Docker Image
        if: github.event_name != 'pull_request'
        run: |
          docker push ${{ steps.set-tag.outputs.TAGS }}
--- a/84
+++ b/84
@@ -33,7 +33,7 @@ else
 endif
 .PHONY: install
-install: install-data-mate
+install: install-datamate
 .PHONY: uninstall-%
 uninstall-%:
@@ -54,84 +54,78 @@ else
 endif
 .PHONY: uninstall
-uninstall: uninstall-data-mate
+uninstall: uninstall-datamate
 # build
 .PHONY: mineru-docker-build
 mineru-docker-build:
 	docker build -t mineru:$(VERSION) . -f scripts/images/mineru/Dockerfile
 .PHONY: datax-docker-build
 datax-docker-build:
 	docker build -t datax:$(VERSION) . -f scripts/images/datax/Dockerfile
 .PHONY: unstructured-docker-build
 unstructured-docker-build:
 	docker build -t unstructured:$(VERSION) . -f scripts/images/unstructured/Dockerfile
 .PHONY: backend-docker-build
 backend-docker-build:
-	docker build -t backend:$(VERSION) . -f scripts/images/backend/Dockerfile
+	docker build -t datamate-backend:$(VERSION) . -f scripts/images/backend/Dockerfile
 .PHONY: frontend-docker-build
 frontend-docker-build:
-	docker build -t frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile
+	docker build -t datamate-frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile
 .PHONY: runtime-docker-build
 runtime-docker-build:
-	docker build -t runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile
+	docker build -t datamate-runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile
 .PHONY: label-studio-adapter-docker-build
 label-studio-adapter-docker-build:
 	docker build -t label-studio-adapter:$(VERSION) . -f scripts/images/label-studio-adapter/Dockerfile
 .PHONY: backend-docker-install
 backend-docker-install:
-	cd deployment/docker/data-mate && docker-compose up -d backend
+	cd deployment/docker/datamate && docker-compose up -d backend
 .PHONY: backend-docker-uninstall
 backend-docker-uninstall:
-	cd deployment/docker/data-mate && docker-compose down backend
+	cd deployment/docker/datamate && docker-compose down backend
 .PHONY: frontend-docker-install
 frontend-docker-install:
-	cd deployment/docker/data-mate && docker-compose up -d frontend
+	cd deployment/docker/datamate && docker-compose up -d frontend
 .PHONY: frontend-docker-uninstall
 frontend-docker-uninstall:
-	cd deployment/docker/data-mate && docker-compose down frontend
+	cd deployment/docker/datamate && docker-compose down frontend
 .PHONY: runtime-docker-install
 runtime-docker-install:
-	cd deployment/docker/data-mate && docker-compose up -d runtime
+	cd deployment/docker/datamate && docker-compose up -d runtime
 .PHONY: runtime-docker-uninstall
 runtime-docker-uninstall:
-	cd deployment/docker/data-mate && docker-compose down runtime
+	cd deployment/docker/datamate && docker-compose down runtime
 .PHONY: runtime-k8s-install
 runtime-k8s-install: create-namespace
-	helm upgrade kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE)
+	helm upgrade datamate-kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE)
-	helm upgrade raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE)
+	helm upgrade datamate-raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE)
 	kubectl apply -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
 .PHONY: runtime-k8s-uninstall
 runtime-k8s-uninstall:
-	helm uninstall raycluster -n $(NAMESPACE)
+	helm uninstall datamate-raycluster -n $(NAMESPACE)
-	helm uninstall kuberay-operator -n $(NAMESPACE)
+	helm uninstall datamate-kuberay-operator -n $(NAMESPACE)
 	kubectl delete -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
 .PHONY: unstructured-k8s-install
 unstructured-k8s-install: create-namespace
 	kubectl apply -f deployment/kubernetes/unstructured/deploy.yaml -n $(NAMESPACE)
 .PHONY: mysql-k8s-install
 mysql-k8s-install: create-namespace
-	kubectl create configmap init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE)
+	kubectl create configmap datamate-init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE)
 	kubectl apply -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
 	kubectl apply -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
 .PHONY: mysql-k8s-uninstall
 mysql-k8s-uninstall:
-	kubectl delete configmap init-sql -n $(NAMESPACE)
+	kubectl delete configmap datamate-init-sql -n $(NAMESPACE) --ignore-not-found
-	kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
+	kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE) --ignore-not-found
-	kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
+	kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE) --ignore-not-found
 .PHONY: database-k8s-install
 database-k8s-install: mysql-k8s-install
 .PHONY: database-k8s-uninstall
 database-k8s-uninstall: mysql-k8s-uninstall
 .PHONY: backend-k8s-install
 backend-k8s-install: create-namespace
@@ -139,7 +133,7 @@ backend-k8s-install: create-namespace
 .PHONY: backend-k8s-uninstall
 backend-k8s-uninstall:
-	kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE)
+	kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE) --ignore-not-found
 .PHONY: frontend-k8s-install
 frontend-k8s-install: create-namespace
@@ -147,18 +141,18 @@ frontend-k8s-install: create-namespace
 .PHONY: frontend-k8s-uninstall
 frontend-k8s-uninstall:
-	kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE)
+	kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE) --ignore-not-found
-.PHONY: data-mate-docker-install
+.PHONY: datamate-docker-install
-data-mate-docker-install:
+datamate-docker-install:
 	cd deployment/docker/datamate && docker-compose up -d
-.PHONY: data-mate-docker-uninstall
+.PHONY: datamate-docker-uninstall
-data-mate-docker-uninstall:
+datamate-docker-uninstall:
 	cd deployment/docker/datamate && docker-compose down
-.PHONY: data-mate-k8s-install
+.PHONY: datamate-k8s-install
-data-mate-k8s-install: create-namespace mysql-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install
+datamate-k8s-install: create-namespace database-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install
-.PHONY: data-mate-k8s-uninstall
+.PHONY: datamate-k8s-uninstall
-data-mate-k8s-uninstall: mysql-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall
+datamate-k8s-uninstall: database-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall
--- a/README-zh.md
+++ b/README-zh.md
@@ -9,8 +9,7 @@
 ![GitHub Issues](https://img.shields.io/github/issues/ModelEngine-Group/DataMate)
 ![GitHub License](https://img.shields.io/github/license/ModelEngine-Group/DataMate)
-**DataMate是面向模型微调与RAG检索的企业级数据处理平台，支持数据归集、数据管理、算子市场、数据清洗、数据合成、数据标注、数据评估、知识生成等核心功能。
+**DataMate是面向模型微调与RAG检索的企业级数据处理平台，支持数据归集、数据管理、算子市场、数据清洗、数据合成、数据标注、数据评估、知识生成等核心功能。**
 **
 [简体中文](./README-zh.md) | [English](./README.md)
--- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/httpclient/RuntimeClient.java
+++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/httpclient/RuntimeClient.java
@@ -14,7 +14,7 @@ import java.time.Duration;
@Slf4j
 public class RuntimeClient {
-    private static final String BASE_URL = "http://runtime:8081/api";
+    private static final String BASE_URL = "http://datamate-runtime:8081/api";
    private static final String CREATE_TASK_URL = BASE_URL + "/task/{0}/submit";
--- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/service/CleaningTaskService.java
+++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/service/CleaningTaskService.java
@@ -12,6 +12,7 @@ import com.datamate.cleaning.domain.model.TaskProcess;
 import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper;
 import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
 import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
 import com.datamate.cleaning.interfaces.dto.CleaningProcess;
 import com.datamate.cleaning.interfaces.dto.CleaningTask;
 import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
 import com.datamate.cleaning.interfaces.dto.OperatorInstance;
@@ -55,7 +56,14 @@ public class CleaningTaskService {
    public List<CleaningTask> getTasks(String status, String keywords, Integer page, Integer size) {
        Integer offset = page * size;
-        return cleaningTaskMapper.findTasks(status, keywords, size, offset);
+        List<CleaningTask> tasks = cleaningTaskMapper.findTasks(status, keywords, size, offset);
        tasks.forEach(this::setProcess);
        return tasks;
    }
    private void setProcess(CleaningTask task) {
        int count = cleaningResultMapper.countByInstanceId(task.getId());
        task.setProgress(CleaningProcess.of(task.getFileCount(), count));
    }
    public int countTasks(String status, String keywords) {
@@ -80,6 +88,7 @@ public class CleaningTaskService {
        task.setDestDatasetId(destDataset.getId());
        task.setDestDatasetName(destDataset.getName());
        task.setBeforeSize(srcDataset.getTotalSize());
        task.setFileCount(srcDataset.getFileCount());
        cleaningTaskMapper.insertTask(task);
        List<OperatorInstancePo> instancePos = request.getInstance().stream()
@@ -93,7 +102,9 @@ public class CleaningTaskService {
    }
    public CleaningTask getTask(String taskId) {
-        return cleaningTaskMapper.findTaskById(taskId);
+        CleaningTask task = cleaningTaskMapper.findTaskById(taskId);
        setProcess(task);
        return task;
    }
    @Transactional
@@ -113,7 +124,7 @@ public class CleaningTaskService {
        process.setDatasetId(task.getDestDatasetId());
        process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl");
        process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
-        process.setExecutorType(ExecutorType.DATA_PLATFORM.getValue());
+        process.setExecutorType(ExecutorType.DATAMATE.getValue());
        process.setProcess(instances.stream()
                .map(instance -> Map.of(instance.getId(), instance.getOverrides()))
                .toList());
--- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/ExecutorType.java
+++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/ExecutorType.java
@@ -4,7 +4,7 @@ import lombok.Getter;
@Getter
 public enum ExecutorType {
-    DATA_PLATFORM("data_platform"),
+    DATAMATE("datamate"),
    DATA_JUICER_RAY("ray"),
    DATA_JUICER_DEFAULT("default");
--- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java
+++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java
@@ -6,4 +6,6 @@ import org.apache.ibatis.annotations.Param;
@Mapper
 public interface CleaningResultMapper {
    void deleteByInstanceId(@Param("instanceId") String instanceId);
    int countByInstanceId(@Param("instanceId") String instanceId);
 }
--- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningProcess.java
+++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningProcess.java
@@ -4,6 +4,9 @@ package com.datamate.cleaning.interfaces.dto;
 import lombok.Getter;
 import lombok.Setter;
 import java.math.BigDecimal;
 import java.math.RoundingMode;
 /**
 * CleaningProcess
 */
@@ -16,5 +19,20 @@ public class CleaningProcess {
    private Integer totalFileNum;
    private Integer finishedFileNum;
    public CleaningProcess(int totalFileNum, int finishedFileNum) {
        this.totalFileNum = totalFileNum;
        this.finishedFileNum = finishedFileNum;
        if (totalFileNum == 0) {
            this.process = 0.0f;
        } else {
            this.process = BigDecimal.valueOf(finishedFileNum * 100L)
                .divide(BigDecimal.valueOf(totalFileNum), 2, RoundingMode.HALF_UP).floatValue();
        }
    }
    public static CleaningProcess of(int totalFileNum, int finishedFileNum) {
        return new CleaningProcess(totalFileNum, finishedFileNum);
    }
 }
--- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTask.java
+++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTask.java
@@ -36,6 +36,8 @@ public class CleaningTask {
    private long afterSize;
    private int fileCount;
    /**
     * 任务当前状态
     */
--- a/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningResultMapper.xml
+++ b/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningResultMapper.xml
@@ -5,4 +5,8 @@
        DELETE FROM t_clean_result WHERE instance_id = #{instanceId}
    </delete>
    <select id="countByInstanceId" resultType="java.lang.Integer">
        SELECT COUNT(1) FROM t_clean_result WHERE instance_id = #{instanceId}
    </select>
 </mapper>
--- a/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningTaskMapper.xml
+++ b/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningTaskMapper.xml
@@ -3,7 +3,7 @@
 <mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper">
    <sql id="Base_Column_List">
        id, name, description, src_dataset_id, src_dataset_name, dest_dataset_id, dest_dataset_name, before_size,
-        after_size, status, created_at, started_at, finished_at
+        after_size, file_count, status, created_at, started_at, finished_at
    </sql>
    <select id="findTasks" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
@@ -28,9 +28,9 @@
    <insert id="insertTask">
        INSERT INTO t_clean_task (id, name, description, status, src_dataset_id, src_dataset_name, dest_dataset_id,
-                                  dest_dataset_name, before_size, after_size, created_at)
+                                  dest_dataset_name, before_size, after_size, file_count, created_at)
        VALUES (#{id}, #{name}, #{description}, #{status}, #{srcDatasetId}, #{srcDatasetName}, #{destDatasetId},
-                #{destDatasetName}, ${beforeSize}, ${afterSize}, NOW())
+                #{destDatasetName}, #{beforeSize}, #{afterSize}, #{fileCount}, NOW())
    </insert>
    <update id="updateTask">
--- a/backend/services/main-application/src/main/resources/application.yml
+++ b/backend/services/main-application/src/main/resources/application.yml
@@ -12,7 +12,7 @@ spring:
  # 数据源配置
  datasource:
    driver-class-name: com.mysql.cj.jdbc.Driver
-    url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
+    url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
    username: ${DB_USERNAME:root}
    password: ${DB_PASSWORD:Huawei@123}
    hikari:
--- a/backend/services/main-application/src/main/resources/log4j2.xml
+++ b/backend/services/main-application/src/main/resources/log4j2.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Configuration status="WARN" monitorInterval="30">
    <Properties>
-        <Property name="LOG_PATH">/var/log/data-mate/backend</Property>
+        <Property name="LOG_PATH">/var/log/datamate/backend</Property>
        <Property name="LOG_PATTERN">%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</Property>
        <Property name="MAX_FILE_SIZE">100MB</Property>
        <Property name="MAX_HISTORY">30</Property>
--- a/deployment/docker/datamate/docker-compose.yml
+++ b/deployment/docker/datamate/docker-compose.yml
@@ -1,63 +1,70 @@
 services:
  # 1) backend
-  backend:
+  datamate-backend:
-    container_name: backend
+    container_name: datamate-backend
-    image: backend
+    image: datamate-backend
    restart: on-failure
    privileged: true
    ports:
-      - "8080:8080"
+      - "8080"
    volumes:
      - dataset_volume:/dataset
      - flow_volume:/flow
-      - log_volume:/var/log/data-mate
+      - log_volume:/var/log/datamate
-    networks: [ edatamate ]
+    networks: [ datamate ]
    depends_on:
-      - mysql
+      - datamate-database
  # 2) frontend（NodePort 30000）
-  frontend:
+  datamate-frontend:
-    container_name: frontend
+    container_name: datamate-frontend
-    image: frontend
+    image: datamate-frontend
    restart: on-failure
    ports:
      - "30000:80"            # nodePort → hostPort
    volumes:
-      - log_volume:/var/log/data-mate
+      - frontend_log_volume:/var/log/datamate/frontend
-    networks: [ edatamate ]
+    networks: [ datamate ]
    depends_on:
-      - backend
+      - datamate-backend
-  # 3) mysql
+  # 3) database
-  mysql:
+  datamate-database:
-    container_name: mysql
+    container_name: datamate-database
    image: mysql:8
    restart: on-failure
    environment:
      MYSQL_ROOT_PASSWORD: Huawei@123
    ports:
-      - "3306:3306"
+      - "3306"
    command: |
      sh -c "
        chown mysql:mysql /var/log/datamate/database &&
        chmod 755 /var/log/datamate/database &&
        exec docker-entrypoint.sh mysqld
      "
    volumes:
      - mysql_volume:/var/lib/mysql
      - ../../../scripts/db:/docker-entrypoint-initdb.d
-      - ./utf8.cnf:/etc/mysql/conf.d/utf8.cnf
+      - ./utf8.cnf:/etc/mysql/conf.d/utf8.cnf:ro
-      - log_volume:/var/log/data-mate
+      - database_log_volume:/var/log/datamate/database
-    networks: [ edatamate ]
+    networks: [ datamate ]
-  runtime:
+  # 3) runtime
-    container_name: runtime
+  datamate-runtime:
-    image: runtime
+    container_name: datamate-runtime
    image: datamate-runtime
    restart: on-failure
    environment:
      RAY_DEDUP_LOGS: "0"
      RAY_TQDM_PATCH_PRINT: "0"
-      MYSQL_HOST: "mysql"
+      MYSQL_HOST: "datamate-database"
      MYSQL_PORT: "3306"
      MYSQL_USER: "root"
      MYSQL_PASSWORD: "Huawei@123"
      MYSQL_DATABASE: "datamate"
    ports:
-      - "8081:8081"
+      - "8081"
    command:
      - python
      - /opt/runtime/datamate/operator_runtime.py
@@ -65,22 +72,27 @@ services:
      - "8081"
    volumes:
      - ray_log_volume:/tmp/ray
-      - log_volume:/var/log/data-mate
+      - log_volume:/var/log/datamate
      - dataset_volume:/dataset
      - flow_volume:/flow
    networks: [ datamate ]
 volumes:
  dataset_volume:
-    name: data-mate-dataset-volume
+    name: datamate-dataset-volume
  flow_volume:
-    name: data-mate-flow-volume
+    name: datamate-flow-volume
  log_volume:
-    name: data-mate-log-volume
+    name: datamate-log-volume
  mysql_volume:
-    name: data-mate-mysql-volume
+    name: datamate-mysql-volume
  ray_log_volume:
-    name: data-mate-ray-log-volume
+    name: datamate-ray-log-volume
  frontend_log_volume:
    name: datamate-frontend-log-volume
  database_log_volume:
    name: datamate-database-log-volume
 networks:
-  edatamate:
+  datamate:
    driver: bridge
--- a/deployment/docker/datamate/utf8.cnf
+++ b/deployment/docker/datamate/utf8.cnf
@@ -5,6 +5,7 @@ character-set-server = utf8mb4
 collation-server = utf8mb4_unicode_ci
 # 或者使用 utf8_general_ci (性能稍好，但排序规则稍宽松)
 default-time-zone = 'Asia/Shanghai'
 log_error=/var/log/datamate/database/error.log
 [client]
 # 设置客户端连接默认字符集
--- a/deployment/helm/ray/kuberay-operator/values.yaml
+++ b/deployment/helm/ray/kuberay-operator/values.yaml
@@ -3,13 +3,13 @@
 # Declare variables to be passed into your templates.
 # -- String to partially override release name.
-nameOverride: kuberay-operator
+nameOverride: datamate-kuberay-operator
 # -- String to fully override release name.
-fullnameOverride: kuberay-operator
+fullnameOverride: datamate-kuberay-operator
 # -- String to override component name.
-componentOverride: kuberay-operator
+componentOverride: datamate-kuberay-operator
 image:
  # -- Image repository.
@@ -32,7 +32,7 @@ serviceAccount:
  create: true
  # -- The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template.
-  name: kuberay-operator
+  name: datamate-kuberay-operator
 logging:
  # -- Log encoder to use for stdout (one of `json` or `console`).
@@ -88,7 +88,7 @@ featureGates:
 # Configurations for KubeRay operator metrics.
 metrics:
  # -- Whether KubeRay operator should emit control plane metrics.
-  enabled: true
+  enabled: false
  serviceMonitor:
    # -- Enable a prometheus ServiceMonitor
    enabled: false
--- a/deployment/helm/ray/ray-cluster/values.yaml
+++ b/deployment/helm/ray/ray-cluster/values.yaml
@@ -6,12 +6,12 @@
 # in this Helm chart.
 image:
-  repository: runtime
+  repository: datamate-runtime
  tag: latest
  pullPolicy: IfNotPresent
 nameOverride: "kuberay"
-fullnameOverride: ""
+fullnameOverride: "datamate-raycluster"
 imagePullSecrets: []
 # - name: an-existing-secret
@@ -68,7 +68,7 @@ head:
    - name: RAY_TQDM_PATCH_PRINT
      value: "0"
    - name: MYSQL_HOST
-      value: "mysql"
+      value: "datamate-database"
    - name: MYSQL_PORT
      value: "3306"
    - name: MYSQL_USER
@@ -114,15 +114,15 @@ head:
  volumes:
    - name: log-volume
      hostPath:
-        path: /opt/data-mate/data/log
+        path: /opt/datamate/data/log
        type: DirectoryOrCreate
    - name: dataset-volume
      hostPath:
-        path: /opt/data-mate/data/dataset
+        path: /opt/datamate/data/dataset
        type: DirectoryOrCreate
    - name: flow-volume
      hostPath:
-        path: /opt/data-mate/data/flow
+        path: /opt/datamate/data/flow
        type: DirectoryOrCreate
  volumeMounts:
    - mountPath: /tmp/ray
@@ -136,7 +136,7 @@ head:
  # Follows standard K8s container spec.
  sidecarContainers:
    - name: runtime
-      image: runtime
+      image: datamate-runtime
      imagePullPolicy: IfNotPresent
      command:
        - python
@@ -145,7 +145,7 @@ head:
        - "8081"
      env:
        - name: MYSQL_HOST
-          value: "mysql"
+          value: "datamate-database"
        - name: MYSQL_PORT
          value: "3306"
        - name: MYSQL_USER
@@ -160,7 +160,7 @@ head:
        - mountPath: /tmp/ray
          name: log-volume
          subPath: ray/head
-        - mountPath: /var/log/data-mate
+        - mountPath: /var/log/datamate
          name: log-volume
        - mountPath: /dataset
          name: dataset-volume
@@ -212,7 +212,7 @@ worker:
    - name: RAY_TQDM_PATCH_PRINT
      value: "0"
    - name: MYSQL_HOST
-      value: "mysql"
+      value: "datamate-database"
    - name: MYSQL_PORT
      value: "3306"
    - name: MYSQL_USER
@@ -257,15 +257,15 @@ worker:
  volumes:
    - name: log-volume
      hostPath:
-        path: /opt/data-mate/data/log
+        path: /opt/datamate/data/log
        type: DirectoryOrCreate
    - name: dataset-volume
      hostPath:
-        path: /opt/data-mate/data/dataset
+        path: /opt/datamate/data/dataset
        type: DirectoryOrCreate
    - name: flow-volume
      hostPath:
-        path: /opt/data-mate/data/flow
+        path: /opt/datamate/data/flow
        type: DirectoryOrCreate
  volumeMounts:
    - mountPath: /tmp/ray
@@ -350,15 +350,15 @@ additionalWorkerGroups:
    volumes:
      - name: log-volume
        hostPath:
-          path: /opt/data-mate/data/log
+          path: /opt/datamate/data/log
          type: DirectoryOrCreate
      - name: dataset-volume
        hostPath:
-          path: /opt/data-mate/data/dataset
+          path: /opt/datamate/data/dataset
          type: DirectoryOrCreate
      - name: flow-volume
        hostPath:
-          path: /opt/data-mate/data/flow
+          path: /opt/datamate/data/flow
          type: DirectoryOrCreate
    volumeMounts:
      - mountPath: /tmp/ray
--- a/deployment/helm/ray/service.yaml
+++ b/deployment/helm/ray/service.yaml
@@ -1,7 +1,7 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: runtime
+  name: datamate-runtime
  labels:
    ray.io/node-type: head
 spec:
--- a/deployment/kubernetes/backend/deploy.yaml
+++ b/deployment/kubernetes/backend/deploy.yaml
@@ -2,8 +2,9 @@ apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
  labels:
-    app: backend
+    app: datamate
-  name: backend
+    tier: backend
  name: datamate-backend
 rules:
  - verbs:
      - create
@@ -33,83 +34,89 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
  labels:
-    app: backend
+    app: datamate
-  name: backend
+    tier: backend
  name: datamate-backend
 ---
 kind: RoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
  labels:
-    app: backend
+    app: datamate
-  name: backend
+    tier: backend
  name: datamate-backend
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
-  name: backend
+  name: datamate-backend
 subjects:
  - kind: ServiceAccount
-    name: backend
+    name: datamate-backend
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: backend
+  name: datamate-backend
  labels:
-    app: backend
+    app: datamate
    tier: backend
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app: backend
+      app: datamate
      tier: backend
  template:
    metadata:
      labels:
-        app: backend
+        app: datamate
        tier: backend
    spec:
-      serviceAccountName: backend
+      serviceAccountName: datamate-backend
      containers:
-      - name: backend
+        - name: backend
-        image: backend
+          image: datamate-backend
-        imagePullPolicy: IfNotPresent
+          imagePullPolicy: IfNotPresent
-        env:
+          env:
-          - name: namespace
+            - name: namespace
-            valueFrom:
+              valueFrom:
-              fieldRef:
+                fieldRef:
-                fieldPath: metadata.namespace
+                  fieldPath: metadata.namespace
-          - name: SPRING_CONFIG_LOCATION
+            - name: SPRING_CONFIG_LOCATION
-            value: file:/opt/backend/application.yml
+              value: file:/opt/backend/application.yml
-        ports:
+          ports:
-        - containerPort: 8080
+            - containerPort: 8080
-        volumeMounts:
+          volumeMounts:
-        - name: dataset-volume
+            - name: dataset-volume
-          mountPath: /dataset
+              mountPath: /dataset
-        - name: flow-volume
+            - name: flow-volume
-          mountPath: /flow
+              mountPath: /flow
-        - name: log-volume
+            - name: log-volume
-          mountPath: /var/log/data-mate
+              mountPath: /var/log/datamate
      volumes:
-      - name: dataset-volume
+        - name: dataset-volume
-        hostPath:
+          hostPath:
-          path: /opt/data-mate/data/dataset
+            path: /opt/datamate/data/dataset
-          type: DirectoryOrCreate
+            type: DirectoryOrCreate
-      - name: flow-volume
+        - name: flow-volume
-        hostPath:
+          hostPath:
-          path: /opt/data-mate/data/flow
+            path: /opt/datamate/data/flow
-          type: DirectoryOrCreate
+            type: DirectoryOrCreate
-      - name: log-volume
+        - name: log-volume
-        hostPath:
+          hostPath:
-          path: /opt/data-mate/data/log
+            path: /opt/datamate/data/log
-          type: DirectoryOrCreate
+            type: DirectoryOrCreate
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  name: backend
+  name: datamate-backend
  labels:
-    app: backend
+    app: datamate
    tier: backend
 spec:
  type: ClusterIP
  ports:
@@ -117,4 +124,5 @@ spec:
      targetPort: 8080
      protocol: TCP
  selector:
-    app: backend
+    app: datamate
    tier: backend
--- a/deployment/kubernetes/datax/deploy.yaml
+++ b/deployment/kubernetes/datax/deploy.yaml
@@ -1,54 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: datax
  labels:
    app: datax
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: datax
  template:
    metadata:
      labels:
        app: datax
    spec:
      containers:
      - name: datax
        image: datax
        imagePullPolicy: IfNotPresent
        securityContext:
          capabilities:
            add:
              - SYS_ADMIN
        command:
          - bash
          - -c
          - rpcbind && python3 /opt/datax/bin/app.py
        ports:
        - containerPort: 8000
        volumeMounts:
        - name: dataset
          mountPath: /dataset
          subPath: dataset
      volumes:
      - name: dataset
        hostPath:
          path: /tmp/data-mate
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: datax
  labels:
    app: datax
 spec:
  type: ClusterIP
  ports:
    - port: 8000
      targetPort: 8000
      protocol: TCP
  selector:
    app: datax
--- a/deployment/kubernetes/frontend/deploy.yaml
+++ b/deployment/kubernetes/frontend/deploy.yaml
@@ -1,32 +1,45 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: frontend
+  name: datamate-frontend
  labels:
-    app: frontend
+    app: datamate
    tier: frontend
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app: frontend
+      app: datamate
      tier: frontend
  template:
    metadata:
      labels:
-        app: frontend
+        app: datamate
        tier: frontend
    spec:
      containers:
-      - name: frontend
+        - name: frontend
-        image: frontend
+          image: datamate-frontend
-        imagePullPolicy: IfNotPresent
+          imagePullPolicy: IfNotPresent
-        ports:
+          ports:
-        - containerPort: 80
+            - containerPort: 80
          volumeMounts:
            - name: log-volume
              mountPath: /var/log/datamate/frontend
              subPath: frontend
      volumes:
        - name: log-volume
          hostPath:
            path: /opt/datamate/data/log
            type: DirectoryOrCreate
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  name: frontend
+  name: datamate-frontend
  labels:
-    app: frontend
+    app: datamate
    tier: frontend
 spec:
  type: NodePort
  ports:
@@ -35,4 +48,5 @@ spec:
      nodePort: 30000
      protocol: TCP
  selector:
-    app: frontend
+    app: datamate
    tier: frontend
--- a/deployment/kubernetes/mineru/deploy.yaml
+++ b/deployment/kubernetes/mineru/deploy.yaml
@@ -1,52 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: mineru
  labels:
    app: mineru
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: mineru
  template:
    metadata:
      labels:
        app: mineru
    spec:
      containers:
      - name: mineru
        image: mineru
        imagePullPolicy: IfNotPresent
        command:
          - mineru-api
        args:
          - --host
          - "0.0.0.0"
          - --port
          - "8000"
        ports:
        - containerPort: 8000
        volumeMounts:
        - name: tmp
          mountPath: /tmp/data-mate
      volumes:
      - name: tmp
        hostPath:
          path: /tmp/data-mate
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: mineru
  labels:
    app: mineru
 spec:
  type: ClusterIP
  ports:
    - port: 8000
      targetPort: 8000
      protocol: TCP
  selector:
    app: mineru
--- a/deployment/kubernetes/mysql/configmap.yaml
+++ b/deployment/kubernetes/mysql/configmap.yaml
@@ -1,7 +1,7 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: mysql-utf8-config
+  name: datamate-mysql-utf8-config
 data:
  utf8.cnf: |
    [mysqld]
@@ -11,6 +11,7 @@ data:
    collation-server = utf8mb4_unicode_ci
    # 或者使用 utf8_general_ci (性能稍好，但排序规则稍宽松)
    default-time-zone = 'Asia/Shanghai'
    log_error=/var/log/datamate/database/error.log
    [client]
    # 设置客户端连接默认字符集
--- a/deployment/kubernetes/mysql/deploy.yaml
+++ b/deployment/kubernetes/mysql/deploy.yaml
@@ -1,21 +1,39 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: mysql
+  name: datamate-database
  labels:
-    app: mysql
+    app: datamate
    tier: database
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app: mysql
+      app: datamate
      tier: database
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
-        app: mysql
+        app: datamate
        tier: database
    spec:
      initContainers:
        - name: init-log
          image: mysql:8
          imagePullPolicy: IfNotPresent
          command:
            - sh
            - -c
          args:
            - |
              chown mysql:mysql /var/log/datamate/database
              chmod 755 /var/log/datamate/database
          volumeMounts:
            - name: log-volume
              mountPath: /var/log/datamate/database
              subPath: database
      containers:
        - name: mysql
          image: mysql:8
@@ -28,6 +46,9 @@ spec:
          volumeMounts:
            - name: data-volume
              mountPath: /var/lib/mysql
            - name: log-volume
              mountPath: /var/log/datamate/database
              subPath: database
            - name: init-sql
              mountPath: /docker-entrypoint-initdb.d
            - name: mysql-utf8-config
@@ -35,22 +56,27 @@ spec:
      volumes:
        - name: data-volume
          hostPath:
-            path: /opt/data-mate/data/mysql
+            path: /opt/datamate/data/mysql
            type: DirectoryOrCreate
        - name: log-volume
          hostPath:
            path: /opt/datamate/data/log
            type: DirectoryOrCreate
        - name: init-sql
          configMap:
-            name: init-sql
+            name: datamate-init-sql
        - name: mysql-utf8-config
          configMap:
-            name: mysql-utf8-config
+            name: datamate-mysql-utf8-config
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  name: mysql
+  name: datamate-database
  labels:
-    app: mysql
+    app: datamate
    tier: database
 spec:
  type: ClusterIP
  ports:
@@ -58,4 +84,5 @@ spec:
      targetPort: 3306
      protocol: TCP
  selector:
-    app: mysql
+    app: datamate
    tier: database
--- a/deployment/kubernetes/unstructured/deploy.yaml
+++ b/deployment/kubernetes/unstructured/deploy.yaml
@@ -1,49 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: unstructured
  labels:
    app: unstructured
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: unstructured
  template:
    metadata:
      labels:
        app: unstructured
    spec:
      containers:
      - name: unstructured
        image: unstructured
        imagePullPolicy: IfNotPresent
        command:
          - python
        args:
          - app.py
        ports:
        - containerPort: 8000
        volumeMounts:
        - name: tmp
          mountPath: /tmp/data-mate
      volumes:
      - name: tmp
        hostPath:
          path: /tmp/data-mate
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: unstructured
  labels:
    app: unstructured
 spec:
  type: ClusterIP
  ports:
    - port: 8000
      targetPort: 8000
      protocol: TCP
  selector:
    app: unstructured
--- a/editions/community/config/application.yml
+++ b/editions/community/config/application.yml
@@ -12,7 +12,7 @@ spring:
  # 数据源配置
  datasource:
    driver-class-name: com.mysql.cj.jdbc.Driver
-    url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
+    url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
    username: ${DB_USERNAME:root}
    password: ${DB_PASSWORD:Huawei@123}
    hikari:
--- a/editions/community/config/log4j2.xml
+++ b/editions/community/config/log4j2.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Configuration status="WARN" monitorInterval="30">
    <Properties>
-        <Property name="LOG_PATH">/var/log/data-mate/backend</Property>
+        <Property name="LOG_PATH">/var/log/datamate/backend</Property>
        <Property name="LOG_PATTERN">%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</Property>
        <Property name="MAX_FILE_SIZE">100MB</Property>
        <Property name="MAX_HISTORY">30</Property>
--- a/runtime/label-studio-adapter/.env.example
+++ b/runtime/label-studio-adapter/.env.example
@@ -0,0 +1,128 @@
 # ====================================
 # Label Studio Adapter Configuration
 # ====================================
 # =========================
 # 应用程序配置
 # =========================
 APP_NAME="Label Studio Adapter"
 APP_VERSION="1.0.0"
 APP_DESCRIPTION="Adapter for integrating Data Management System with Label Studio"
 DEBUG=true
 # =========================
 # 服务器配置
 # =========================
 HOST=0.0.0.0
 PORT=18000
 # =========================
 # 日志配置
 # =========================
 LOG_LEVEL=INFO
 # =========================
 # Label Studio 服务配置
 # =========================
 # Label Studio 服务地址（根据部署方式调整）
 # Docker 环境：http://label-studio:8080
 # 本地开发：http://127.0.0.1:8000
 LABEL_STUDIO_BASE_URL=http://label-studio:8080
 # Label Studio 用户名和密码（用于自动创建用户）
 LABEL_STUDIO_USERNAME=admin@example.com
 LABEL_STUDIO_PASSWORD=password
 # Label Studio API 认证 Token（Legacy Token，推荐使用）
 # 从 Label Studio UI 的 Account & Settings > Access Token 获取
 LABEL_STUDIO_USER_TOKEN=your-label-studio-token-here
 # Label Studio 本地文件存储基础路径（容器内路径，用于 Docker 部署时的权限检查）
 LABEL_STUDIO_LOCAL_BASE=/label-studio/local_files
 # Label Studio 本地文件服务路径前缀（任务数据中的文件路径前缀）
 LABEL_STUDIO_FILE_PATH_PREFIX=/data/local-files/?d=
 # Label Studio 容器中的本地存储路径（用于配置 Local Storage）
 LABEL_STUDIO_LOCAL_STORAGE_DATASET_BASE_PATH=/label-studio/local_files/dataset
 LABEL_STUDIO_LOCAL_STORAGE_UPLOAD_BASE_PATH=/label-studio/local_files/upload
 # Label Studio 任务列表分页大小
 LS_TASK_PAGE_SIZE=1000
 # =========================
 # Data Management 服务配置
 # =========================
 # DM 服务地址
 DM_SERVICE_BASE_URL=http://data-engine:8080
 # DM 存储文件夹前缀（通常与 Label Studio 的 local-files 文件夹映射一致）
 DM_FILE_PATH_PREFIX=/
 # =========================
 # Adapter 数据库配置 (MySQL)
 # =========================
 # 优先级1：如果配置了 MySQL，将优先使用 MySQL 数据库
 MYSQL_HOST=adapter-db
 MYSQL_PORT=3306
 MYSQL_USER=label_studio_user
 MYSQL_PASSWORD=user_password
 MYSQL_DATABASE=label_studio_adapter
 # =========================
 # Label Studio 数据库配置 (PostgreSQL)
 # =========================
 # 仅在使用 docker-compose.label-studio.yml 启动 Label Studio 时需要配置
 POSTGRES_HOST=label-studio-db
 POSTGRES_PORT=5432
 POSTGRES_USER=labelstudio
 POSTGRES_PASSWORD=labelstudio@4321
 POSTGRES_DATABASE=labelstudio
 # =========================
 # SQLite 数据库配置（兜底选项）
 # =========================
 # 优先级3：如果没有配置 MySQL/PostgreSQL，将使用 SQLite
 SQLITE_PATH=./data/labelstudio_adapter.db
 # =========================
 # 可选：直接指定数据库 URL
 # =========================
 # 如果设置了此项，将覆盖上面的 MySQL/PostgreSQL/SQLite 配置
 # DATABASE_URL=postgresql+asyncpg://user:password@host:port/database
 # =========================
 # 安全配置
 # =========================
 # 密钥（生产环境务必修改）
 SECRET_KEY=your-secret-key-change-this-in-production
 # Token 过期时间（分钟）
 ACCESS_TOKEN_EXPIRE_MINUTES=30
 # =========================
 # CORS 配置
 # =========================
 # 允许的来源（生产环境建议配置具体域名）
 ALLOWED_ORIGINS=["*"]
 # 允许的 HTTP 方法
 ALLOWED_METHODS=["*"]
 # 允许的请求头
 ALLOWED_HEADERS=["*"]
 # =========================
 # Docker Compose 配置
 # =========================
 # Docker Compose 项目名称前缀
 COMPOSE_PROJECT_NAME=ls-adapter
 # =========================
 # 同步配置（未来扩展）
 # =========================
 # 批量同步任务的批次大小
 SYNC_BATCH_SIZE=100
 # 同步失败时的最大重试次数
 MAX_RETRIES=3
--- a/runtime/label-studio-adapter/.gitignore
+++ b/runtime/label-studio-adapter/.gitignore
@@ -0,0 +1,6 @@
 # Local Development Environment Files
 .env
 .dev.env
 # logs
 logs/
--- a/runtime/label-studio-adapter/alembic.ini
+++ b/runtime/label-studio-adapter/alembic.ini
@@ -0,0 +1,148 @@
 # A generic, single database configuration.
 [alembic]
 # path to migration scripts.
 # this is typically a path given in POSIX (e.g. forward slashes)
 # format, relative to the token %(here)s which refers to the location of this
 # ini file
 script_location = %(here)s/alembic
 # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
 # Uncomment the line below if you want the files to be prepended with date and time
 # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
 # for all available tokens
 # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
 # sys.path path, will be prepended to sys.path if present.
 # defaults to the current working directory.  for multiple paths, the path separator
 # is defined by "path_separator" below.
 prepend_sys_path = .
 # timezone to use when rendering the date within the migration file
 # as well as the filename.
 # If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
 # Any required deps can installed by adding `alembic[tz]` to the pip requirements
 # string value is passed to ZoneInfo()
 # leave blank for localtime
 # timezone =
 # max length of characters to apply to the "slug" field
 # truncate_slug_length = 40
 # set to 'true' to run the environment during
 # the 'revision' command, regardless of autogenerate
 # revision_environment = false
 # set to 'true' to allow .pyc and .pyo files without
 # a source .py file to be detected as revisions in the
 # versions/ directory
 # sourceless = false
 # version location specification; This defaults
 # to <script_location>/versions.  When using multiple version
 # directories, initial revisions must be specified with --version-path.
 # The path separator used here should be the separator specified by "path_separator"
 # below.
 # version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
 # path_separator; This indicates what character is used to split lists of file
 # paths, including version_locations and prepend_sys_path within configparser
 # files such as alembic.ini.
 # The default rendered in new alembic.ini files is "os", which uses os.pathsep
 # to provide os-dependent path splitting.
 #
 # Note that in order to support legacy alembic.ini files, this default does NOT
 # take place if path_separator is not present in alembic.ini.  If this
 # option is omitted entirely, fallback logic is as follows:
 #
 # 1. Parsing of the version_locations option falls back to using the legacy
 #    "version_path_separator" key, which if absent then falls back to the legacy
 #    behavior of splitting on spaces and/or commas.
 # 2. Parsing of the prepend_sys_path option falls back to the legacy
 #    behavior of splitting on spaces, commas, or colons.
 #
 # Valid values for path_separator are:
 #
 # path_separator = :
 # path_separator = ;
 # path_separator = space
 # path_separator = newline
 #
 # Use os.pathsep. Default configuration used for new projects.
 path_separator = os
 # set to 'true' to search source files recursively
 # in each "version_locations" directory
 # new in Alembic version 1.10
 # recursive_version_locations = false
 # the output encoding used when revision files
 # are written from script.py.mako
 # output_encoding = utf-8
 # database URL.  This is consumed by the user-maintained env.py script only.
 # other means of configuring database URLs may be customized within the env.py
 # file.
 # sqlalchemy.url = driver://user:pass@localhost/dbname
 # 注释掉默认 URL，我们将在 env.py 中从应用配置读取
 [post_write_hooks]
 # post_write_hooks defines scripts or Python functions that are run
 # on newly generated revision scripts.  See the documentation for further
 # detail and examples
 # format using "black" - use the console_scripts runner, against the "black" entrypoint
 # hooks = black
 # black.type = console_scripts
 # black.entrypoint = black
 # black.options = -l 79 REVISION_SCRIPT_FILENAME
 # lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
 # hooks = ruff
 # ruff.type = module
 # ruff.module = ruff
 # ruff.options = check --fix REVISION_SCRIPT_FILENAME
 # Alternatively, use the exec runner to execute a binary found on your PATH
 # hooks = ruff
 # ruff.type = exec
 # ruff.executable = ruff
 # ruff.options = check --fix REVISION_SCRIPT_FILENAME
 # Logging configuration.  This is also consumed by the user-maintained
 # env.py script only.
 [loggers]
 keys = root,sqlalchemy,alembic
 [handlers]
 keys = console
 [formatters]
 keys = generic
 [logger_root]
 level = WARNING
 handlers = console
 qualname =
 [logger_sqlalchemy]
 level = WARNING
 handlers =
 qualname = sqlalchemy.engine
 [logger_alembic]
 level = INFO
 handlers =
 qualname = alembic
 [handler_console]
 class = StreamHandler
 args = (sys.stderr,)
 level = NOTSET
 formatter = generic
 [formatter_generic]
 format = %(levelname)-5.5s [%(name)s] %(message)s
 datefmt = %H:%M:%S
--- a/runtime/label-studio-adapter/alembic/README
+++ b/runtime/label-studio-adapter/alembic/README
@@ -0,0 +1 @@
 Generic single-database configuration.
--- a/runtime/label-studio-adapter/alembic/env.py
+++ b/runtime/label-studio-adapter/alembic/env.py
@@ -0,0 +1,145 @@
 from logging.config import fileConfig
 from sqlalchemy import engine_from_config
 from sqlalchemy import pool
 from sqlalchemy import create_engine, text
 from alembic import context
 import os
 from urllib.parse import quote_plus
 # 导入应用配置和模型
 from app.core.config import settings
 from app.db.database import Base
 # 导入所有模型，以便 autogenerate 能够检测到它们
 from app.models import dataset_mapping  # noqa
 # this is the Alembic Config object, which provides
 # access to the values within the .ini file in use.
 config = context.config
 def ensure_database_and_user():
    """
    确保数据库和用户存在
    使用 root 用户连接 MySQL，创建数据库和应用用户
    """
    # 只在 MySQL 配置时执行
    if not settings.mysql_host:
        return
    mysql_root_password = os.getenv('MYSQL_ROOT_PASSWORD', 'Huawei@123')
    # URL 编码密码以处理特殊字符
    encoded_password = quote_plus(mysql_root_password)
    # 使用 root 用户连接（不指定数据库）
    root_url = f"mysql+pymysql://root:{encoded_password}@{settings.mysql_host}:{settings.mysql_port}/"
    try:
        root_engine = create_engine(root_url, poolclass=pool.NullPool)
        with root_engine.connect() as conn:
            # 创建数据库（如果不存在）
            conn.execute(text(
                f"CREATE DATABASE IF NOT EXISTS `{settings.mysql_database}` "
                f"CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci"
            ))
            conn.commit()
            # 创建用户（如果不存在）- 使用 MySQL 8 默认的 caching_sha2_password
            conn.execute(text(
                f"CREATE USER IF NOT EXISTS '{settings.mysql_user}'@'%' "
                f"IDENTIFIED BY '{settings.mysql_password}'"
            ))
            conn.commit()
            # 授予权限
            conn.execute(text(
                f"GRANT ALL PRIVILEGES ON `{settings.mysql_database}`.* TO '{settings.mysql_user}'@'%'"
            ))
            conn.commit()
            # 刷新权限
            conn.execute(text("FLUSH PRIVILEGES"))
            conn.commit()
        root_engine.dispose()
        print(f"✓ Database '{settings.mysql_database}' and user '{settings.mysql_user}' are ready")
    except Exception as e:
        print(f"⚠️  Warning: Could not ensure database and user: {e}")
        print(f"   This may be expected if database already exists or permissions are set")
 # 从应用配置设置数据库 URL
 config.set_main_option('sqlalchemy.url', settings.sync_database_url)
 # Interpret the config file for Python logging.
 # This line sets up loggers basically.
 if config.config_file_name is not None:
    fileConfig(config.config_file_name)
 # add your model's MetaData object here
 # for 'autogenerate' support
 # from myapp import mymodel
 # target_metadata = mymodel.Base.metadata
 target_metadata = Base.metadata
 # other values from the config, defined by the needs of env.py,
 # can be acquired:
 # my_important_option = config.get_main_option("my_important_option")
 # ... etc.
 def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode.
    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.
    Calls to context.execute() here emit the given string to the
    script output.
    """
    url = config.get_main_option("sqlalchemy.url")
    context.configure(
        url=url,
        target_metadata=target_metadata,
        literal_binds=True,
        dialect_opts={"paramstyle": "named"},
    )
    with context.begin_transaction():
        context.run_migrations()
 def run_migrations_online() -> None:
    """Run migrations in 'online' mode.
    In this scenario we need to create an Engine
    and associate a connection with the context.
    """
    # 先确保数据库和用户存在
    ensure_database_and_user()
    connectable = engine_from_config(
        config.get_section(config.config_ini_section, {}),
        prefix="sqlalchemy.",
        poolclass=pool.NullPool,
    )
    with connectable.connect() as connection:
        context.configure(
            connection=connection, target_metadata=target_metadata
        )
        with context.begin_transaction():
            context.run_migrations()
 if context.is_offline_mode():
    run_migrations_offline()
 else:
    run_migrations_online()
--- a/runtime/label-studio-adapter/alembic/script.py.mako
+++ b/runtime/label-studio-adapter/alembic/script.py.mako
@@ -0,0 +1,28 @@
 """${message}
 Revision ID: ${up_revision}
 Revises: ${down_revision | comma,n}
 Create Date: ${create_date}
 """
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 ${imports if imports else ""}
 # revision identifiers, used by Alembic.
 revision: str = ${repr(up_revision)}
 down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
 branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
 depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
 def upgrade() -> None:
    """Upgrade schema."""
    ${upgrades if upgrades else "pass"}
 def downgrade() -> None:
    """Downgrade schema."""
    ${downgrades if downgrades else "pass"}
--- a/runtime/label-studio-adapter/alembic/versions/755dc1afb8ad_initiation.py
+++ b/runtime/label-studio-adapter/alembic/versions/755dc1afb8ad_initiation.py
@@ -0,0 +1,41 @@
 """Initiation
 Revision ID: 755dc1afb8ad
 Revises: 
 Create Date: 2025-10-20 19:34:20.258554
 """
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision: str = '755dc1afb8ad'
 down_revision: Union[str, Sequence[str], None] = None
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    """Upgrade schema."""
    # ### commands auto generated by Alembic - please adjust! ###
    op.create_table('mapping',
    sa.Column('mapping_id', sa.String(length=36), nullable=False),
    sa.Column('source_dataset_id', sa.String(length=36), nullable=False, comment='源数据集ID'),
    sa.Column('labelling_project_id', sa.String(length=36), nullable=False, comment='标注项目ID'),
    sa.Column('labelling_project_name', sa.String(length=255), nullable=True, comment='标注项目名称'),
    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True, comment='创建时间'),
    sa.Column('last_updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True, comment='最后更新时间'),
    sa.Column('deleted_at', sa.DateTime(timezone=True), nullable=True, comment='删除时间'),
    sa.PrimaryKeyConstraint('mapping_id')
    )
    # ### end Alembic commands ###
 def downgrade() -> None:
    """Downgrade schema."""
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_table('mapping')
    # ### end Alembic commands ###
--- a/runtime/label-studio-adapter/app/init.py
+++ b/runtime/label-studio-adapter/app/init.py
@@ -0,0 +1 @@
 # app/__init__.py
--- a/runtime/label-studio-adapter/app/api/init.py
+++ b/runtime/label-studio-adapter/app/api/init.py
@@ -0,0 +1,19 @@
 """
 API 路由模块
 集中管理所有API路由的组织结构
 """
 from fastapi import APIRouter
 from .system import router as system_router
 from .project import project_router
 # 创建主API路由器
 api_router = APIRouter()
 # 注册到主路由器
 api_router.include_router(system_router, tags=["系统"])
 api_router.include_router(project_router, prefix="/project", tags=["项目"])
 # 导出路由器供 main.py 使用
 __all__ = ["api_router"]
--- a/runtime/label-studio-adapter/app/api/project/init.py
+++ b/runtime/label-studio-adapter/app/api/project/init.py
@@ -0,0 +1,11 @@
 """
 标注工程相关API路由模块
 """
 from fastapi import APIRouter
 project_router = APIRouter()
 from . import create
 from . import sync
 from . import list
 from . import delete
--- a/runtime/label-studio-adapter/app/api/project/create.py
+++ b/runtime/label-studio-adapter/app/api/project/create.py
@@ -0,0 +1,130 @@
 from fastapi import APIRouter, Depends, HTTPException
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing import Optional
 from app.db.database import get_db
 from app.services.dataset_mapping_service import DatasetMappingService
 from app.clients import get_clients
 from app.schemas.dataset_mapping import (
    DatasetMappingCreateRequest,
    DatasetMappingCreateResponse,
 )
 from app.schemas import StandardResponse
 from app.core.logging import get_logger
 from app.core.config import settings
 from . import project_router
 logger = get_logger(__name__)
@project_router.post("/create", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
 async def create_dataset_mapping(
    request: DatasetMappingCreateRequest,
    db: AsyncSession = Depends(get_db)
 ):
    """
    创建数据集映射
    根据指定的DM程序中的数据集，创建Label Studio中的数据集，
    在数据库中记录这一关联关系，返回Label Studio数据集的ID
    注意：一个数据集可以创建多个标注项目
    """
    try:
        # 获取全局客户端实例
        dm_client_instance, ls_client_instance = get_clients()
        service = DatasetMappingService(db)
        logger.info(f"Create dataset mapping request: {request.source_dataset_id}")
        # 从DM服务获取数据集信息
        dataset_info = await dm_client_instance.get_dataset(request.source_dataset_id)
        if not dataset_info:
            raise HTTPException(
                status_code=404,
                detail=f"Dataset not found in DM service: {request.source_dataset_id}"
            )
        # 确定数据类型（基于数据集类型）
        data_type = "image"  # 默认值
        if dataset_info.type and dataset_info.type.code:
            type_code = dataset_info.type.code.lower()
            if "audio" in type_code:
                data_type = "audio"
            elif "video" in type_code:
                data_type = "video"
            elif "text" in type_code:
                data_type = "text"
        # 生成项目名称
        project_name = f"{dataset_info.name}"
        # 在Label Studio中创建项目
        project_data = await ls_client_instance.create_project(
            title=project_name,
            description=dataset_info.description or f"Imported from DM dataset {dataset_info.id}",
            data_type=data_type
        )
        if not project_data:
            raise HTTPException(
                status_code=500,
                detail="Fail to create Label Studio project."
            )
        project_id = project_data["id"]
        # 配置本地存储：dataset/<id>
        local_storage_path = f"{settings.label_studio_local_storage_dataset_base_path}/{request.source_dataset_id}"
        storage_result = await ls_client_instance.create_local_storage(
            project_id=project_id,
            path=local_storage_path,
            title="Dataset_BLOB",
            use_blob_urls=True,
            description=f"Local storage for dataset {dataset_info.name}"
        )
        # 配置本地存储：upload
        local_storage_path = f"{settings.label_studio_local_storage_upload_base_path}"
        storage_result = await ls_client_instance.create_local_storage(
            project_id=project_id,
            path=local_storage_path,
            title="Upload_BLOB",
            use_blob_urls=True,
            description=f"Local storage for dataset {dataset_info.name}"
        )
        if not storage_result:
            # 本地存储配置失败，记录警告但不中断流程
            logger.warning(f"Failed to configure local storage for project {project_id}")
        else:
            logger.info(f"Local storage configured for project {project_id}: {local_storage_path}")
        # 创建映射关系，包含项目名称
        mapping = await service.create_mapping(
            request, 
            str(project_id),
            project_name
        )
        logger.debug(
            f"Dataset mapping created: {mapping.mapping_id} -> S {mapping.source_dataset_id} <> L {mapping.labelling_project_id}"
        )
        response_data = DatasetMappingCreateResponse(
            mapping_id=mapping.mapping_id,
            labelling_project_id=mapping.labelling_project_id,
            labelling_project_name=mapping.labelling_project_name or project_name,
            message="Dataset mapping created successfully"
        )
        return StandardResponse(
            code=201,
            message="success",
            data=response_data
        )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error while creating dataset mapping: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")
--- a/runtime/label-studio-adapter/app/api/project/delete.py
+++ b/runtime/label-studio-adapter/app/api/project/delete.py
@@ -0,0 +1,106 @@
 from fastapi import APIRouter, Depends, HTTPException, Query
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing import Optional
 from app.db.database import get_db
 from app.services.dataset_mapping_service import DatasetMappingService
 from app.clients import get_clients
 from app.schemas.dataset_mapping import DeleteDatasetResponse
 from app.schemas import StandardResponse
 from app.core.logging import get_logger
 from . import project_router
 logger = get_logger(__name__)
@project_router.delete("/mappings", response_model=StandardResponse[DeleteDatasetResponse])
 async def delete_mapping(
    m: Optional[str] = Query(None, description="映射UUID"),
    proj: Optional[str] = Query(None, description="Label Studio项目ID"),
    db: AsyncSession = Depends(get_db)
 ):
    """
    删除映射关系和对应的 Label Studio 项目
    可以通过以下任一方式指定要删除的映射：
    - m: 映射UUID
    - proj: Label Studio项目ID
    - 两者都提供（优先使用 m）
    此操作会：
    1. 删除 Label Studio 中的项目
    2. 软删除数据库中的映射记录
    """
    try:
        # 至少需要提供一个参数
        if not m and not proj:
            raise HTTPException(
                status_code=400,
                detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided"
            )
        # 获取全局客户端实例
        dm_client_instance, ls_client_instance = get_clients()
        service = DatasetMappingService(db)
        mapping = None
        # 优先使用 mapping_id 查询
        if m:
            logger.info(f"Deleting by mapping UUID: {m}")
            mapping = await service.get_mapping_by_uuid(m)
        # 如果没有提供 m，使用 proj 查询
        elif proj:
            logger.info(f"Deleting by project ID: {proj}")
            mapping = await service.get_mapping_by_labelling_project_id(proj)
        if not mapping:
            raise HTTPException(
                status_code=404,
                detail=f"Mapping not found"
            )
        mapping_id = mapping.mapping_id
        labelling_project_id = mapping.labelling_project_id
        labelling_project_name = mapping.labelling_project_name
        logger.info(f"Found mapping: {mapping_id}, Label Studio project ID: {labelling_project_id}")
        # 1. 删除 Label Studio 项目
        try:
            delete_success = await ls_client_instance.delete_project(int(labelling_project_id))
            if delete_success:
                logger.info(f"Successfully deleted Label Studio project: {labelling_project_id}")
            else:
                logger.warning(f"Failed to delete Label Studio project or project not found: {labelling_project_id}")
        except Exception as e:
            logger.error(f"Error deleting Label Studio project: {e}")
            # 继续执行，即使 Label Studio 项目删除失败也要删除映射记录
        # 2. 软删除映射记录
        soft_delete_success = await service.soft_delete_mapping(mapping_id)
        if not soft_delete_success:
            raise HTTPException(
                status_code=500,
                detail="Failed to delete mapping record"
            )
        logger.info(f"Successfully deleted mapping: {mapping_id}")
        response_data = DeleteDatasetResponse(
            mapping_id=mapping_id,
            status="success",
            message=f"Successfully deleted mapping and Label Studio project '{labelling_project_name}'"
        )
        return StandardResponse(
            code=200,
            message="success",
            data=response_data
        )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error deleting mapping: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")
--- a/runtime/label-studio-adapter/app/api/project/list.py
+++ b/runtime/label-studio-adapter/app/api/project/list.py
@@ -0,0 +1,110 @@
 from fastapi import APIRouter, Depends, HTTPException, Query
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing import List
 from app.db.database import get_db
 from app.services.dataset_mapping_service import DatasetMappingService
 from app.schemas.dataset_mapping import DatasetMappingResponse
 from app.schemas import StandardResponse
 from app.core.logging import get_logger
 from . import project_router
 logger = get_logger(__name__)
@project_router.get("/mappings/list", response_model=StandardResponse[List[DatasetMappingResponse]])
 async def list_mappings(
    skip: int = Query(0, ge=0, description="Number of records to skip"),
    limit: int = Query(100, ge=1, le=1000, description="Maximum number of records to return"),
    db: AsyncSession = Depends(get_db)
 ):
    """
    查询所有映射关系
    返回所有有效的数据集映射关系（未被软删除的）
    """
    try:
        service = DatasetMappingService(db)
        logger.info(f"Listing mappings, skip={skip}, limit={limit}")
        mappings = await service.get_all_mappings(skip=skip, limit=limit)
        logger.info(f"Found {len(mappings)} mappings")
        return StandardResponse(
            code=200,
            message="success",
            data=mappings
        )
    except Exception as e:
        logger.error(f"Error listing mappings: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")
@project_router.get("/mappings/{mapping_id}", response_model=StandardResponse[DatasetMappingResponse])
 async def get_mapping(
    mapping_id: str,
    db: AsyncSession = Depends(get_db)
 ):
    """
    根据 UUID 查询单个映射关系
    """
    try:
        service = DatasetMappingService(db)
        logger.info(f"Get mapping: {mapping_id}")
        mapping = await service.get_mapping_by_uuid(mapping_id)
        if not mapping:
            raise HTTPException(
                status_code=404,
                detail=f"Mapping not found: {mapping_id}"
            )
        logger.info(f"Found mapping: {mapping.mapping_id}")
        return StandardResponse(
            code=200,
            message="success",
            data=mapping
        )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error getting mapping: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")
@project_router.get("/mappings/by-source/{source_dataset_id}", response_model=StandardResponse[List[DatasetMappingResponse]])
 async def get_mappings_by_source(
    source_dataset_id: str,
    db: AsyncSession = Depends(get_db)
 ):
    """
    根据源数据集 ID 查询所有映射关系
    返回该数据集创建的所有标注项目（包括已删除的）
    """
    try:
        service = DatasetMappingService(db)
        logger.info(f"Get mappings by source dataset id: {source_dataset_id}")
        mappings = await service.get_mappings_by_source_dataset_id(source_dataset_id)
        logger.info(f"Found {len(mappings)} mappings")
        return StandardResponse(
            code=200,
            message="success",
            data=mappings
        )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error getting mappings: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")
--- a/runtime/label-studio-adapter/app/api/project/sync.py
+++ b/runtime/label-studio-adapter/app/api/project/sync.py
@@ -0,0 +1,68 @@
 from fastapi import APIRouter, Depends, HTTPException, Query
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing import List, Optional
 from app.db.database import get_db
 from app.services.dataset_mapping_service import DatasetMappingService
 from app.services.sync_service import SyncService
 from app.clients import get_clients
 from app.exceptions import NoDatasetInfoFoundError, DatasetMappingNotFoundError
 from app.schemas.dataset_mapping import (
    DatasetMappingResponse,
    SyncDatasetRequest,
    SyncDatasetResponse,
 )
 from app.schemas import StandardResponse
 from app.core.logging import get_logger
 from . import project_router
 logger = get_logger(__name__)
@project_router.post("/sync", response_model=StandardResponse[SyncDatasetResponse])
 async def sync_dataset_content(
    request: SyncDatasetRequest,
    db: AsyncSession = Depends(get_db)
 ):
    """
    同步数据集内容
    根据指定的mapping ID，同步DM程序数据集中的内容到Label Studio数据集中，
    在数据库中记录更新时间，返回更新状态
    """
    try:
        dm_client_instance, ls_client_instance = get_clients()
        mapping_service = DatasetMappingService(db)
        sync_service = SyncService(dm_client_instance, ls_client_instance, mapping_service)
        logger.info(f"Sync dataset content request: mapping_id={request.mapping_id}")
        # 根据 mapping_id 获取映射关系
        mapping = await mapping_service.get_mapping_by_uuid(request.mapping_id)
        if not mapping:
            raise HTTPException(
                status_code=404,
                detail=f"Mapping not found: {request.mapping_id}"
            )
        # 执行同步（使用映射中的源数据集UUID）
        result = await sync_service.sync_dataset_files(request.mapping_id, request.batch_size)
        logger.info(f"Sync completed: {result.synced_files}/{result.total_files} files")
        return StandardResponse(
            code=200,
            message="success",
            data=result
        )
    except HTTPException:
        raise
    except NoDatasetInfoFoundError as e:
        logger.error(f"Failed to get dataset info: {e}")
        raise HTTPException(status_code=404, detail=str(e))
    except DatasetMappingNotFoundError as e:
        logger.error(f"Mapping not found: {e}")
        raise HTTPException(status_code=404, detail=str(e))
    except Exception as e:
        logger.error(f"Error syncing dataset content: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")
--- a/runtime/label-studio-adapter/app/api/system.py
+++ b/runtime/label-studio-adapter/app/api/system.py
@@ -0,0 +1,34 @@
 from fastapi import APIRouter
 from typing import Dict, Any
 from app.core.config import settings
 from app.schemas import StandardResponse
 router = APIRouter()
@router.get("/health", response_model=StandardResponse[Dict[str, Any]])
 async def health_check():
    """健康检查端点"""
    return StandardResponse(
        code=200,
        message="success",
        data={
            "status": "healthy",
            "service": "Label Studio Adapter",
            "version": settings.app_version
        }
    )
@router.get("/config", response_model=StandardResponse[Dict[str, Any]])
 async def get_config():
    """获取配置信息"""
    return StandardResponse(
        code=200,
        message="success",
        data={
            "app_name": settings.app_name,
            "version": settings.app_version,
            "dm_service_url": settings.dm_service_base_url,
            "label_studio_url": settings.label_studio_base_url,
            "debug": settings.debug
        }
    )
--- a/runtime/label-studio-adapter/app/clients/init.py
+++ b/runtime/label-studio-adapter/app/clients/init.py
@@ -0,0 +1,8 @@
 # app/clients/__init__.py
 from .dm_client import DMServiceClient
 from .label_studio_client import LabelStudioClient
 from .client_manager import get_clients, set_clients, get_dm_client, get_ls_client
 __all__ = ["DMServiceClient", "LabelStudioClient", "get_clients", "set_clients", "get_dm_client", "get_ls_client"]
--- a/runtime/label-studio-adapter/app/clients/client_manager.py
+++ b/runtime/label-studio-adapter/app/clients/client_manager.py
@@ -0,0 +1,34 @@
 from typing import Optional
 from fastapi import HTTPException
 from .dm_client import DMServiceClient
 from .label_studio_client import LabelStudioClient
 # 全局客户端实例（将在main.py中初始化）
 dm_client: Optional[DMServiceClient] = None
 ls_client: Optional[LabelStudioClient] = None
 def get_clients() -> tuple[DMServiceClient, LabelStudioClient]:
    """获取客户端实例"""
    global dm_client, ls_client
    if not dm_client or not ls_client:
        raise HTTPException(status_code=500, detail="客户端未初始化")
    return dm_client, ls_client
 def set_clients(dm_client_instance: DMServiceClient, ls_client_instance: LabelStudioClient) -> None:
    """设置全局客户端实例"""
    global dm_client, ls_client
    dm_client = dm_client_instance
    ls_client = ls_client_instance
 def get_dm_client() -> DMServiceClient:
    """获取DM服务客户端"""
    if not dm_client:
        raise HTTPException(status_code=500, detail="DM客户端未初始化")
    return dm_client
 def get_ls_client() -> LabelStudioClient:
    """获取Label Studio客户端"""
    if not ls_client:
        raise HTTPException(status_code=500, detail="Label Studio客户端未初始化")
    return ls_client
--- a/runtime/label-studio-adapter/app/clients/dm_client.py
+++ b/runtime/label-studio-adapter/app/clients/dm_client.py
@@ -0,0 +1,138 @@
 import httpx
 from typing import Optional
 from app.core.config import settings
 from app.core.logging import get_logger
 from app.schemas.dm_service import DatasetResponse, PagedDatasetFileResponse
 logger = get_logger(__name__)
 class DMServiceClient:
    """数据管理服务客户端"""
    def __init__(self, base_url: str|None = None, timeout: float = 30.0):
        self.base_url = base_url or settings.dm_service_base_url
        self.timeout = timeout
        self.client = httpx.AsyncClient(
            base_url=self.base_url,
            timeout=self.timeout
        )
        logger.info(f"Initialize DM service client, base url: {self.base_url}")
    @staticmethod
    def _unwrap_payload(data):
        """Unwrap common envelope shapes like {'code': ..., 'message': ..., 'data': {...}}."""
        if isinstance(data, dict) and 'data' in data and isinstance(data['data'], (dict, list)):
            return data['data']
        return data
    @staticmethod
    def _is_error_payload(data) -> bool:
        """Detect error-shaped payloads returned with HTTP 200."""
        if not isinstance(data, dict):
            return False
        # Common patterns: {error, message, ...} or {code, message, ...} without data
        if 'error' in data and 'message' in data:
            return True
        if 'code' in data and 'message' in data and 'data' not in data:
            return True
        return False
    @staticmethod
    def _keys(d):
        return list(d.keys()) if isinstance(d, dict) else []
    async def get_dataset(self, dataset_id: str) -> Optional[DatasetResponse]:
        """获取数据集详情"""
        try:
            logger.info(f"Getting dataset detail: {dataset_id} ...")
            response = await self.client.get(f"/data-management/datasets/{dataset_id}")
            response.raise_for_status()
            raw = response.json()
            data = self._unwrap_payload(raw)
            if self._is_error_payload(data):
                logger.error(f"DM service returned error for dataset {dataset_id}: {data}")
                return None
            if not isinstance(data, dict):
                logger.error(f"Unexpected dataset payload type for {dataset_id}: {type(data).__name__}")
                return None
            required = ["id", "name", "description", "datasetType", "status", "fileCount", "totalSize"]
            if not all(k in data for k in required):
                logger.error(f"Dataset payload missing required fields for {dataset_id}. Keys: {self._keys(data)}")
                return None
            return DatasetResponse(**data)
        except httpx.HTTPError as e:
            logger.error(f"Failed to get dataset {dataset_id}: {e}")
            return None
        except Exception as e:
            logger.error(f"[Unexpected] [GET] dataset {dataset_id}: \n{e}\nRaw JSON received: \n{raw}")
            return None
    async def get_dataset_files(
        self, 
        dataset_id: str, 
        page: int = 0, 
        size: int = 100,
        file_type: Optional[str] = None,
        status: Optional[str] = None
    ) -> Optional[PagedDatasetFileResponse]:
        """获取数据集文件列表"""
        try:
            logger.info(f"Get dataset files: dataset={dataset_id}, page={page}, size={size}")
            params: dict = {"page": page, "size": size}
            if file_type:
                params["fileType"] = file_type
            if status:
                params["status"] = status
            response = await self.client.get(
                f"/data-management/datasets/{dataset_id}/files",
                params=params
            )
            response.raise_for_status()
            raw = response.json()
            data = self._unwrap_payload(raw)
            if self._is_error_payload(data):
                logger.error(f"DM service returned error for dataset files {dataset_id}: {data}")
                return None
            if not isinstance(data, dict):
                logger.error(f"Unexpected dataset files payload type for {dataset_id}: {type(data).__name__}")
                return None
            required = ["content", "totalElements", "totalPages", "page", "size"]
            if not all(k in data for k in required):
                logger.error(f"Files payload missing required fields for {dataset_id}. Keys: {self._keys(data)}")
                return None
            return PagedDatasetFileResponse(**data)
        except httpx.HTTPError as e:
            logger.error(f"Failed to get dataset files for {dataset_id}: {e}")
            return None
        except Exception as e:
            logger.error(f"[Unexpected] [GET] dataset files {dataset_id}: \n{e}\nRaw JSON received: \n{raw}")
            return None
    async def download_file(self, dataset_id: str, file_id: str) -> Optional[bytes]:
        """下载文件内容"""
        try:
            logger.info(f"Download file: dataset={dataset_id}, file={file_id}")
            response = await self.client.get(
                f"/data-management/datasets/{dataset_id}/files/{file_id}/download"
            )
            response.raise_for_status()
            return response.content
        except httpx.HTTPError as e:
            logger.error(f"Failed to download file {file_id}: {e}")
            return None
        except Exception as e:
            logger.error(f"Unexpected error while downloading file {file_id}: {e}")
            return None
    async def get_file_download_url(self, dataset_id: str, file_id: str) -> str:
        """获取文件下载URL"""
        return f"{self.base_url}/data-management/datasets/{dataset_id}/files/{file_id}/download"
    async def close(self):
        """关闭客户端连接"""
        await self.client.aclose()
        logger.info("DM service client connection closed")
--- a/runtime/label-studio-adapter/app/clients/label_studio_client.py
+++ b/runtime/label-studio-adapter/app/clients/label_studio_client.py
@@ -0,0 +1,469 @@
 import httpx
 from typing import Optional, Dict, Any, List
 import json
 from app.core.config import settings
 from app.core.logging import get_logger
 from app.schemas.label_studio import (
    LabelStudioProject, 
    LabelStudioCreateProjectRequest,
    LabelStudioCreateTaskRequest
 )
 logger = get_logger(__name__)
 class LabelStudioClient:
    """Label Studio服务客户端
    使用 HTTP REST API 直接与 Label Studio 交互
    认证方式：使用 Authorization: Token {token} 头部进行认证
    """
    # 默认标注配置模板
    DEFAULT_LABEL_CONFIGS = {
        "image": """
        <View>
          <Image name="image" value="$image"/>
          <RectangleLabels name="label" toName="image">
            <Label value="Object" background="red"/>
          </RectangleLabels>
        </View>
        """,
        "text": """
        <View>
          <Text name="text" value="$text"/>
          <Choices name="sentiment" toName="text">
            <Choice value="positive"/>
            <Choice value="negative"/>
            <Choice value="neutral"/>
          </Choices>
        </View>
        """,
        "audio": """
        <View>
          <Audio name="audio" value="$audio"/>
          <AudioRegionLabels name="label" toName="audio">
            <Label value="Speech" background="red"/>
            <Label value="Noise" background="blue"/>
          </AudioRegionLabels>
        </View>
        """,
        "video": """
        <View>
          <Video name="video" value="$video"/>
          <VideoRegionLabels name="label" toName="video">
            <Label value="Action" background="red"/>
          </VideoRegionLabels>
        </View>
        """
    }
    def __init__(
        self, 
        base_url: Optional[str] = None, 
        token: Optional[str] = None,
        timeout: float = 30.0
    ):
        """初始化 Label Studio 客户端
        Args:
            base_url: Label Studio 服务地址
            token: API Token（使用 Authorization: Token {token} 头部）
            timeout: 请求超时时间（秒）
        """
        self.base_url = (base_url or settings.label_studio_base_url).rstrip("/")
        self.token = token or settings.label_studio_user_token
        self.timeout = timeout
        if not self.token:
            raise ValueError("Label Studio API token is required")
        # 初始化 HTTP 客户端
        self.client = httpx.AsyncClient(
            base_url=self.base_url,
            timeout=self.timeout,
            headers={
                "Authorization": f"Token {self.token}",
                "Content-Type": "application/json"
            }
        )
        logger.info(f"Label Studio client initialized: {self.base_url}")
    def get_label_config_by_type(self, data_type: str) -> str:
        """根据数据类型获取标注配置"""
        return self.DEFAULT_LABEL_CONFIGS.get(data_type.lower(), self.DEFAULT_LABEL_CONFIGS["image"])
    async def create_project(
        self, 
        title: str, 
        description: str = "", 
        label_config: Optional[str] = None,
        data_type: str = "image"
    ) -> Optional[Dict[str, Any]]:
        """创建Label Studio项目"""
        try:
            logger.info(f"Creating Label Studio project: {title}")
            if not label_config:
                label_config = self.get_label_config_by_type(data_type)
            project_data = {
                "title": title,
                "description": description,
                "label_config": label_config.strip()
            }
            response = await self.client.post("/api/projects", json=project_data)
            response.raise_for_status()
            project = response.json()
            project_id = project.get("id")
            if not project_id:
                raise Exception("Label Studio response does not contain project ID")
            logger.info(f"Project created successfully, ID: {project_id}")
            return project
        except httpx.HTTPStatusError as e:
            logger.error(f"Create project failed HTTP {e.response.status_code}: {e.response.text}")
            return None
        except Exception as e:
            logger.error(f"Error while creating Label Studio project: {e}")
            return None
    async def import_tasks(
        self,
        project_id: int,
        tasks: List[Dict[str, Any]],
        commit_to_project: bool = True,
        return_task_ids: bool = True
    ) -> Optional[Dict[str, Any]]:
        """批量导入任务到Label Studio项目"""
        try:
            logger.info(f"Importing {len(tasks)} tasks into project {project_id}")
            response = await self.client.post(
                f"/api/projects/{project_id}/import",
                json=tasks,
                params={
                    "commit_to_project": str(commit_to_project).lower(),
                    "return_task_ids": str(return_task_ids).lower()
                }
            )
            response.raise_for_status()
            result = response.json()
            task_count = result.get("task_count", len(tasks))
            logger.info(f"Tasks imported successfully: {task_count}")
            return result
        except httpx.HTTPStatusError as e:
            logger.error(f"Import tasks failed HTTP {e.response.status_code}: {e.response.text}")
            return None
        except Exception as e:
            logger.error(f"Error while importing tasks: {e}")
            return None
    async def create_tasks_batch(
        self,
        project_id: str,
        tasks: List[Dict[str, Any]]
    ) -> Optional[Dict[str, Any]]:
        """批量创建任务的便利方法"""
        try:
            pid = int(project_id)
            return await self.import_tasks(pid, tasks)
        except ValueError as e:
            logger.error(f"Invalid project ID format: {project_id}, error: {e}")
            return None
        except Exception as e:
            logger.error(f"Error while creating tasks in batch: {e}")
            return None
    async def create_task(
        self,
        project_id: str,
        data: Dict[str, Any],
        meta: Optional[Dict[str, Any]] = None
    ) -> Optional[Dict[str, Any]]:
        """创建单个任务"""
        try:
            task = {"data": data}
            if meta:
                task["meta"] = meta
            return await self.create_tasks_batch(project_id, [task])
        except Exception as e:
            logger.error(f"Error while creating single task: {e}")
            return None
    async def get_project_tasks(
        self,
        project_id: str,
        page: Optional[int] = None,
        page_size: int = 1000
    ) -> Optional[Dict[str, Any]]:
        """获取项目任务信息
        Args:
            project_id: 项目ID
            page: 页码（从1开始）。如果为None，则获取所有任务
            page_size: 每页大小
        Returns:
            如果指定了page参数，返回包含分页信息的字典：
            {
                "count": 总任务数,
                "page": 当前页码,
                "page_size": 每页大小,
                "project_id": 项目ID,
                "tasks": 当前页的任务列表
            }
            如果page为None，返回包含所有任务的字典：
                "count": 总任务数,
                "project_id": 项目ID,
                "tasks": 所有任务列表
            }
        """
        try:
            pid = int(project_id)
            # 如果指定了page，直接获取单页任务
            if page is not None:
                logger.info(f"Fetching tasks for project {pid}, page {page} (page_size={page_size})")
                response = await self.client.get(
                    f"/api/projects/{pid}/tasks",
                    params={
                        "page": page,
                        "page_size": page_size
                    }
                )
                response.raise_for_status()
                result = response.json()
                # 返回单页结果，包含分页信息
                return {
                    "count": result.get("total", len(result.get("tasks", []))),
                    "page": page,
                    "page_size": page_size,
                    "project_id": pid,
                    "tasks": result.get("tasks", [])
                }
            # 如果未指定page，获取所有任务
            logger.info(f"Start fetching all tasks for project {pid} (page_size={page_size})")
            all_tasks = []
            current_page = 1
            while True:
                try:
                    response = await self.client.get(
                        f"/api/projects/{pid}/tasks",
                        params={
                            "page": current_page,
                            "page_size": page_size
                        }
                    )
                    response.raise_for_status()
                    result = response.json()
                    tasks = result.get("tasks", [])
                    if not tasks:
                        logger.debug(f"No more tasks on page {current_page}")
                        break
                    all_tasks.extend(tasks)
                    logger.debug(f"Fetched page {current_page}, {len(tasks)} tasks")
                    # 检查是否还有更多页
                    total = result.get("total", 0)
                    if len(all_tasks) >= total:
                        break
                    current_page += 1
                except httpx.HTTPStatusError as e:
                    if e.response.status_code == 404:
                        # 超出页数范围，结束分页
                        logger.debug(f"Reached last page (page {current_page})")
                        break
                    else:
                        raise
            logger.info(f"Fetched all tasks for project {pid}, total {len(all_tasks)}")
            # 返回所有任务，不包含分页信息
            return {
                "count": len(all_tasks),
                "project_id": pid,
                "tasks": all_tasks
            }
        except httpx.HTTPStatusError as e:
            logger.error(f"获取项目任务失败 HTTP {e.response.status_code}: {e.response.text}")
            return None
        except Exception as e:
            logger.error(f"获取项目任务时发生错误: {e}")
            return None
    async def delete_task(
        self,
        task_id: int
    ) -> bool:
        """删除单个任务"""
        try:
            logger.info(f"Deleting task: {task_id}")
            response = await self.client.delete(f"/api/tasks/{task_id}")
            response.raise_for_status()
            logger.info(f"Task deleted: {task_id}")
            return True
        except httpx.HTTPStatusError as e:
            logger.error(f"Delete task {task_id} failed HTTP {e.response.status_code}: {e.response.text}")
            return False
        except Exception as e:
            logger.error(f"Error while deleting task {task_id}: {e}")
            return False
    async def delete_tasks_batch(
        self,
        task_ids: List[int]
    ) -> Dict[str, int]:
        """批量删除任务"""
        try:
            logger.info(f"Deleting {len(task_ids)} tasks in batch")
            successful_deletions = 0
            failed_deletions = 0
            for task_id in task_ids:
                if await self.delete_task(task_id):
                    successful_deletions += 1
                else:
                    failed_deletions += 1
            logger.info(f"Batch deletion finished: success {successful_deletions}, failed {failed_deletions}")
            return {
                "successful": successful_deletions,
                "failed": failed_deletions,
                "total": len(task_ids)
            }
        except Exception as e:
            logger.error(f"Error while deleting tasks in batch: {e}")
            return {
                "successful": 0,
                "failed": len(task_ids),
                "total": len(task_ids)
            }
    async def get_project(self, project_id: int) -> Optional[Dict[str, Any]]:
        """获取项目信息"""
        try:
            logger.info(f"Fetching project info: {project_id}")
            response = await self.client.get(f"/api/projects/{project_id}")
            response.raise_for_status()
            return response.json()
        except httpx.HTTPStatusError as e:
            logger.error(f"Get project info failed HTTP {e.response.status_code}: {e.response.text}")
            return None
        except Exception as e:
            logger.error(f"Error while getting project info: {e}")
            return None
    async def delete_project(self, project_id: int) -> bool:
        """删除项目"""
        try:
            logger.info(f"Deleting project: {project_id}")
            response = await self.client.delete(f"/api/projects/{project_id}")
            response.raise_for_status()
            logger.info(f"Project deleted: {project_id}")
            return True
        except httpx.HTTPStatusError as e:
            logger.error(f"Delete project {project_id} failed HTTP {e.response.status_code}: {e.response.text}")
            return False
        except Exception as e:
            logger.error(f"Error while deleting project {project_id}: {e}")
            return False
    async def create_local_storage(
        self,
        project_id: int,
        path: str,
        title: str,
        use_blob_urls: bool = True,
        regex_filter: Optional[str] = None,
        description: Optional[str] = None
    ) -> Optional[Dict[str, Any]]:
        """创建本地文件存储配置
        Args:
            project_id: Label Studio 项目 ID
            path: 本地文件路径（在 Label Studio 容器中的路径）
            title: 存储配置标题
            use_blob_urls: 是否使用 blob URLs（建议 True）
            regex_filter: 文件过滤正则表达式（可选）
            description: 存储描述（可选）
        Returns:
            创建的存储配置信息，失败返回 None
        """
        try:
            logger.info(f"Creating local storage for project {project_id}: {path}")
            storage_data = {
                "project": project_id,
                "path": path,
                "title": title,
                "use_blob_urls": use_blob_urls
            }
            if regex_filter:
                storage_data["regex_filter"] = regex_filter
            if description:
                storage_data["description"] = description
            response = await self.client.post(
                "/api/storages/localfiles/",
                json=storage_data
            )
            response.raise_for_status()
            storage = response.json()
            storage_id = storage.get("id")
            logger.info(f"Local storage created successfully, ID: {storage_id}")
            return storage
        except httpx.HTTPStatusError as e:
            logger.error(f"Create local storage failed HTTP {e.response.status_code}: {e.response.text}")
            return None
        except Exception as e:
            logger.error(f"Error while creating local storage: {e}")
            return None
    async def close(self):
        """关闭客户端连接"""
        try:
            await self.client.aclose()
            logger.info("Label Studio client closed")
        except Exception as e:
            logger.error(f"Error while closing Label Studio client: {e}")
--- a/runtime/label-studio-adapter/app/core/init.py
+++ b/runtime/label-studio-adapter/app/core/init.py
@@ -0,0 +1 @@
 # app/core/__init__.py
--- a/runtime/label-studio-adapter/app/core/config.py
+++ b/runtime/label-studio-adapter/app/core/config.py
@@ -0,0 +1,146 @@
 from pydantic_settings import BaseSettings
 from typing import Optional
 import os
 from pathlib import Path
 class Settings(BaseSettings):
    """应用程序配置"""
    class Config:
        env_file = ".env"
        case_sensitive = False
        extra = 'ignore'  # 允许额外字段（如 Shell 脚本专用的环境变量）
    # =========================
    # Adapter 服务配置
    # =========================
    app_name: str = "Label Studio Adapter"
    app_version: str = "1.0.0"
    app_description: str = "Adapter for integrating Data Management System with Label Studio"
    debug: bool = True
    # 服务器配置
    host: str = "0.0.0.0"
    port: int = 8000
    # CORS配置
    allowed_origins: list = ["*"]
    allowed_methods: list = ["*"]
    allowed_headers: list = ["*"]
    # MySQL数据库配置 (优先级1)
    mysql_host: Optional[str] = None
    mysql_port: int = 3306
    mysql_user: Optional[str] = None
    mysql_password: Optional[str] = None
    mysql_database: Optional[str] = None
    # PostgreSQL数据库配置 (优先级2)
    postgres_host: Optional[str] = None
    postgres_port: int = 5432
    postgres_user: Optional[str] = None
    postgres_password: Optional[str] = None
    postgres_database: Optional[str] = None
    # SQLite数据库配置 (优先级3 - 兜底)
    sqlite_path: str = "data/labelstudio_adapter.db"
    # 直接数据库URL配置（如果提供，将覆盖上述配置）
    database_url: Optional[str] = None
    # 日志配置
    log_level: str = "INFO"
    # 安全配置
    secret_key: str = "your-secret-key-change-this-in-production"
    access_token_expire_minutes: int = 30
    # =========================
    # Label Studio 服务配置
    # =========================
    label_studio_base_url: str = "http://label-studio:8080"
    label_studio_username: Optional[str] = None  # Label Studio 用户名（用于登录）
    label_studio_password: Optional[str] = None  # Label Studio 密码（用于登录）
    label_studio_user_token: Optional[str] = None  # Legacy Token
    label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files/dataset"  # Label Studio容器中的本地存储基础路径
    label_studio_local_storage_upload_base_path: str = "/label-studio/local_files/upload"  # Label Studio容器中的本地存储基础路径
    label_studio_file_path_prefix: str = "/data/local-files/?d="  # Label Studio本地文件服务路径前缀
    ls_task_page_size: int = 1000
    # =========================
    # Data Management 服务配置
    # =========================
    dm_service_base_url: str = "http://data-engine"
    dm_file_path_prefix: str = "/"  # DM存储文件夹前缀
    @property
    def computed_database_url(self) -> str:
        """
        根据优先级自动选择数据库连接URL
        优先级：MySQL > PostgreSQL > SQLite3
        """
        # 如果直接提供了database_url，优先使用
        if self.database_url:
            return self.database_url
        # 优先级1: MySQL
        if all([self.mysql_host, self.mysql_user, self.mysql_password, self.mysql_database]):
            return f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
        # 优先级2: PostgreSQL
        if all([self.postgres_host, self.postgres_user, self.postgres_password, self.postgres_database]):
            return f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}@{self.postgres_host}:{self.postgres_port}/{self.postgres_database}"
        # 优先级3: SQLite (兜底)
        sqlite_full_path = Path(self.sqlite_path).absolute()
        # 确保目录存在
        sqlite_full_path.parent.mkdir(parents=True, exist_ok=True)
        return f"sqlite+aiosqlite:///{sqlite_full_path}"
    @property 
    def sync_database_url(self) -> str:
        """
        用于数据库迁移的同步连接URL
        将异步驱动替换为同步驱动
        """
        async_url = self.computed_database_url
        # 替换异步驱动为同步驱动
        sync_replacements = {
            "mysql+aiomysql://": "mysql+pymysql://",
            "postgresql+asyncpg://": "postgresql+psycopg2://", 
            "sqlite+aiosqlite:///": "sqlite:///"
        }
        for async_driver, sync_driver in sync_replacements.items():
            if async_url.startswith(async_driver):
                return async_url.replace(async_driver, sync_driver)
        return async_url
    def get_database_info(self) -> dict:
        """获取数据库配置信息"""
        url = self.computed_database_url
        if url.startswith("mysql"):
            db_type = "MySQL"
        elif url.startswith("postgresql"):
            db_type = "PostgreSQL" 
        elif url.startswith("sqlite"):
            db_type = "SQLite"
        else:
            db_type = "Unknown"
        return {
            "type": db_type,
            "url": url,
            "sync_url": self.sync_database_url
        }
 # 全局设置实例
 settings = Settings()
--- a/runtime/label-studio-adapter/app/core/logging.py
+++ b/runtime/label-studio-adapter/app/core/logging.py
@@ -0,0 +1,53 @@
 import logging
 import sys
 from pathlib import Path
 from app.core.config import settings
 def setup_logging():
    """配置应用程序日志"""
    # 创建logs目录
    log_dir = Path("logs")
    log_dir.mkdir(exist_ok=True)
    # 配置日志格式
    log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    date_format = "%Y-%m-%d %H:%M:%S"
    # 创建处理器
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setLevel(getattr(logging, settings.log_level.upper()))
    file_handler = logging.FileHandler(
        log_dir / "app.log",
        encoding="utf-8"
    )
    file_handler.setLevel(getattr(logging, settings.log_level.upper()))
    error_handler = logging.FileHandler(
        log_dir / "error.log", 
        encoding="utf-8"
    )
    error_handler.setLevel(logging.ERROR)
    # 设置格式
    formatter = logging.Formatter(log_format, date_format)
    console_handler.setFormatter(formatter)
    file_handler.setFormatter(formatter)
    error_handler.setFormatter(formatter)
    # 配置根日志器
    root_logger = logging.getLogger()
    root_logger.setLevel(getattr(logging, settings.log_level.upper()))
    root_logger.addHandler(console_handler)
    root_logger.addHandler(file_handler)
    root_logger.addHandler(error_handler)
    # 配置第三方库日志级别（减少详细日志）
    logging.getLogger("uvicorn").setLevel(logging.WARNING)
    logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR)  # 隐藏SQL查询日志
    logging.getLogger("httpx").setLevel(logging.WARNING)
 def get_logger(name: str) -> logging.Logger:
    """获取指定名称的日志器"""
    return logging.getLogger(name)
--- a/runtime/label-studio-adapter/app/db/init.py
+++ b/runtime/label-studio-adapter/app/db/init.py
@@ -0,0 +1 @@
 # app/db/__init__.py
--- a/runtime/label-studio-adapter/app/db/database.py
+++ b/runtime/label-studio-adapter/app/db/database.py
@@ -0,0 +1,39 @@
 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
 from sqlalchemy.orm import declarative_base
 from app.core.config import settings
 from app.core.logging import get_logger
 from typing import AsyncGenerator
 logger = get_logger(__name__)
 # 获取数据库配置信息
 db_info = settings.get_database_info()
 logger.info(f"使用数据库: {db_info['type']}")
 logger.info(f"连接URL: {db_info['url']}")
 # 创建数据库引擎
 engine = create_async_engine(
    settings.computed_database_url,
    echo=False,  # 关闭SQL调试日志以减少输出
    future=True,
    # SQLite特殊配置
    connect_args={"check_same_thread": False} if "sqlite" in settings.computed_database_url else {}
 )
 # 创建会话工厂
 AsyncSessionLocal = async_sessionmaker(
    engine, 
    class_=AsyncSession, 
    expire_on_commit=False
 )
 # 创建基础模型类
 Base = declarative_base()
 async def get_db() -> AsyncGenerator[AsyncSession, None]:
    """获取数据库会话"""
    async with AsyncSessionLocal() as session:
        try:
            yield session
        finally:
            await session.close()
--- a/runtime/label-studio-adapter/app/exceptions.py
+++ b/runtime/label-studio-adapter/app/exceptions.py
@@ -0,0 +1,31 @@
 """
 自定义异常类定义
 """
 class LabelStudioAdapterException(Exception):
    """Label Studio Adapter 基础异常类"""
    pass
 class DatasetMappingNotFoundError(LabelStudioAdapterException):
    """数据集映射未找到异常"""
    def __init__(self, mapping_id: str):
        self.mapping_id = mapping_id
        super().__init__(f"Dataset mapping not found: {mapping_id}")
 class NoDatasetInfoFoundError(LabelStudioAdapterException):
    """无法获取数据集信息异常"""
    def __init__(self, dataset_uuid: str):
        self.dataset_uuid = dataset_uuid
        super().__init__(f"Failed to get dataset info: {dataset_uuid}")
 class LabelStudioClientError(LabelStudioAdapterException):
    """Label Studio 客户端错误"""
    pass
 class DMServiceClientError(LabelStudioAdapterException):
    """DM 服务客户端错误"""
    pass
 class SyncServiceError(LabelStudioAdapterException):
    """同步服务错误"""
    pass
--- a/runtime/label-studio-adapter/app/main.py
+++ b/runtime/label-studio-adapter/app/main.py
@@ -0,0 +1,172 @@
 from fastapi import FastAPI, Request, HTTPException, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from fastapi.exceptions import RequestValidationError
 from starlette.exceptions import HTTPException as StarletteHTTPException
 from contextlib import asynccontextmanager
 from typing import Dict, Any
 from .core.config import settings
 from .core.logging import setup_logging, get_logger
 from .clients import DMServiceClient, LabelStudioClient, set_clients
 from .api import api_router
 from .schemas import StandardResponse
 # 设置日志
 setup_logging()
 logger = get_logger(__name__)
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """应用程序生命周期管理"""
    # 启动时初始化
    logger.info("Starting Label Studio Adapter...")
    # 初始化客户端
    dm_client = DMServiceClient()
    # 初始化 Label Studio 客户端，使用 HTTP REST API + Token 认证
    ls_client = LabelStudioClient(
        base_url=settings.label_studio_base_url,
        token=settings.label_studio_user_token
    )
    # 设置全局客户端
    set_clients(dm_client, ls_client)
    # 数据库初始化由 Alembic 管理
    # 在 Docker 环境中，entrypoint.sh 会在启动前运行: alembic upgrade head
    # 在开发环境中，手动运行: alembic upgrade head
    logger.info("Database schema managed by Alembic")
    logger.info("Label Studio Adapter started")
    yield
    # 关闭时清理
    logger.info("Shutting down Label Studio Adapter...")
    # 客户端清理会在客户端管理器中处理
    logger.info("Label Studio Adapter stopped")
 # 创建FastAPI应用
 app = FastAPI(
    title=settings.app_name,
    description=settings.app_description,
    version=settings.app_version,
    debug=settings.debug,
    lifespan=lifespan
 )
 # 配置CORS中间件
 app.add_middleware(
    CORSMiddleware,
    allow_origins=settings.allowed_origins,
    allow_credentials=True,
    allow_methods=settings.allowed_methods,
    allow_headers=settings.allowed_headers,
 )
 # 自定义异常处理器：StarletteHTTPException (包括404等)
@app.exception_handler(StarletteHTTPException)
 async def starlette_http_exception_handler(request: Request, exc: StarletteHTTPException):
    """将Starlette的HTTPException转换为标准响应格式"""
    return JSONResponse(
        status_code=exc.status_code,
        content={
            "code": exc.status_code,
            "message": "error",
            "data": {
                "detail": exc.detail
            }
        }
    )
 # 自定义异常处理器：FastAPI HTTPException
@app.exception_handler(HTTPException)
 async def fastapi_http_exception_handler(request: Request, exc: HTTPException):
    """将FastAPI的HTTPException转换为标准响应格式"""
    return JSONResponse(
        status_code=exc.status_code,
        content={
            "code": exc.status_code,
            "message": "error",
            "data": {
                "detail": exc.detail
            }
        }
    )
 # 自定义异常处理器：RequestValidationError
@app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request: Request, exc: RequestValidationError):
    """将请求验证错误转换为标准响应格式"""
    return JSONResponse(
        status_code=422,
        content={
            "code": 422,
            "message": "error",
            "data": {
                "detail": "Validation error",
                "errors": exc.errors()
            }
        }
    )
 # 自定义异常处理器：未捕获的异常
@app.exception_handler(Exception)
 async def general_exception_handler(request: Request, exc: Exception):
    """将未捕获的异常转换为标准响应格式"""
    logger.error(f"Unhandled exception: {exc}", exc_info=True)
    return JSONResponse(
        status_code=500,
        content={
            "code": 500,
            "message": "error",
            "data": {
                "detail": "Internal server error"
            }
        }
    )
 # 注册路由
 app.include_router(api_router, prefix="/api")
 # 测试端点：验证异常处理
@app.get("/test-404", include_in_schema=False)
 async def test_404():
    """测试404异常处理"""
    raise HTTPException(status_code=404, detail="Test 404 error")
@app.get("/test-500", include_in_schema=False)
 async def test_500():
    """测试500异常处理"""
    raise Exception("Test uncaught exception")
 # 根路径重定向到文档
@app.get("/", response_model=StandardResponse[Dict[str, Any]], include_in_schema=False)
 async def root():
    """根路径，返回服务信息"""
    return StandardResponse(
        code=200,
        message="success",
        data={
            "message": f"{settings.app_name} is running",
            "version": settings.app_version,
            "docs_url": "/docs",
            "dm_service_url": settings.dm_service_base_url,
            "label_studio_url": settings.label_studio_base_url
        }
    )
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        "app.main:app",
        host=settings.host,
        port=settings.port,
        reload=settings.debug,
        log_level=settings.log_level.lower()
    )
--- a/runtime/label-studio-adapter/app/models/init.py
+++ b/runtime/label-studio-adapter/app/models/init.py
@@ -0,0 +1,5 @@
 # app/models/__init__.py
 from .dataset_mapping import DatasetMapping
 __all__ = ["DatasetMapping"]
--- a/runtime/label-studio-adapter/app/models/dataset_mapping.py
+++ b/runtime/label-studio-adapter/app/models/dataset_mapping.py
@@ -0,0 +1,25 @@
 from sqlalchemy import Column, String, DateTime, Boolean, Text
 from sqlalchemy.sql import func
 from app.db.database import Base
 import uuid
 class DatasetMapping(Base):
    """数据集映射模型"""
    __tablename__ = "mapping"
    mapping_id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
    source_dataset_id = Column(String(36), nullable=False, comment="源数据集ID")
    labelling_project_id = Column(String(36), nullable=False, comment="标注项目ID")
    labelling_project_name = Column(String(255), nullable=True, comment="标注项目名称")
    created_at = Column(DateTime(timezone=True), server_default=func.now(), comment="创建时间")
    last_updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), comment="最后更新时间")
    deleted_at = Column(DateTime(timezone=True), nullable=True, comment="删除时间")
    def __repr__(self):
        return f"<DatasetMapping(uuid={self.mapping_id}, source={self.source_dataset_id}, labelling={self.labelling_project_id})>"
    @property
    def is_deleted(self) -> bool:
        """检查是否已被软删除"""
        return self.deleted_at is not None
--- a/runtime/label-studio-adapter/app/schemas/init.py
+++ b/runtime/label-studio-adapter/app/schemas/init.py
@@ -0,0 +1,29 @@
 # app/schemas/__init__.py
 from .common import *
 from .dataset_mapping import *
 from .dm_service import *
 from .label_studio import *
 __all__ = [
    # Common schemas
    "StandardResponse",
    # Dataset Mapping schemas
    "DatasetMappingBase",
    "DatasetMappingCreateRequest", 
    "DatasetMappingUpdateRequest",
    "DatasetMappingResponse",
    "DatasetMappingCreateResponse",
    "SyncDatasetResponse", 
    "DeleteDatasetResponse",
    # DM Service schemas
    "DatasetFileResponse",
    "PagedDatasetFileResponse", 
    "DatasetResponse",
    # Label Studio schemas
    "LabelStudioProject",
    "LabelStudioTask"
 ]
--- a/runtime/label-studio-adapter/app/schemas/common.py
+++ b/runtime/label-studio-adapter/app/schemas/common.py
@@ -0,0 +1,27 @@
 """
 通用响应模型
 """
 from typing import Generic, TypeVar, Optional
 from pydantic import BaseModel, Field
 # 定义泛型类型变量
 T = TypeVar('T')
 class StandardResponse(BaseModel, Generic[T]):
    """
    标准API响应格式
    所有API端点应返回此格式，确保响应的一致性
    """
    code: int = Field(..., description="HTTP状态码")
    message: str = Field(..., description="响应消息")
    data: Optional[T] = Field(None, description="响应数据")
    class Config:
        json_schema_extra = {
            "example": {
                "code": 200,
                "message": "success",
                "data": {}
            }
        }
--- a/runtime/label-studio-adapter/app/schemas/dataset_mapping.py
+++ b/runtime/label-studio-adapter/app/schemas/dataset_mapping.py
@@ -0,0 +1,53 @@
 from pydantic import BaseModel, Field
 from typing import Optional
 from datetime import datetime
 class DatasetMappingBase(BaseModel):
    """数据集映射 基础模型"""
    source_dataset_id: str = Field(..., description="源数据集ID")
 class DatasetMappingCreateRequest(DatasetMappingBase):
    """数据集映射 创建 请求模型"""
    pass
 class DatasetMappingCreateResponse(BaseModel):
    """数据集映射 创建 响应模型"""
    mapping_id: str = Field(..., description="映射UUID")
    labelling_project_id: str = Field(..., description="Label Studio项目ID")
    labelling_project_name: str = Field(..., description="Label Studio项目名称")
    message: str = Field(..., description="响应消息")
 class DatasetMappingUpdateRequest(BaseModel):
    """数据集映射 更新 请求模型"""
    source_dataset_id: Optional[str] = Field(None, description="源数据集ID")
 class DatasetMappingResponse(DatasetMappingBase):
    """数据集映射 查询 响应模型"""
    mapping_id: str = Field(..., description="映射UUID")
    labelling_project_id: str = Field(..., description="标注项目ID")
    labelling_project_name: Optional[str] = Field(None, description="标注项目名称")
    created_at: datetime = Field(..., description="创建时间")
    last_updated_at: datetime = Field(..., description="最后更新时间")
    deleted_at: Optional[datetime] = Field(None, description="删除时间")
    class Config:
        from_attributes = True
 class SyncDatasetRequest(BaseModel):
    """同步数据集请求模型"""
    mapping_id: str = Field(..., description="映射ID（mapping UUID）")
    batch_size: int = Field(50, ge=1, le=100, description="批处理大小")
 class SyncDatasetResponse(BaseModel):
    """同步数据集响应模型"""
    mapping_id: str = Field(..., description="映射UUID")
    status: str = Field(..., description="同步状态")
    synced_files: int = Field(..., description="已同步文件数量")
    total_files: int = Field(0, description="总文件数量")
    message: str = Field(..., description="响应消息")
 class DeleteDatasetResponse(BaseModel):
    """删除数据集响应模型"""
    mapping_id: str = Field(..., description="映射UUID")
    status: str = Field(..., description="删除状态")
    message: str = Field(..., description="响应消息")
--- a/runtime/label-studio-adapter/app/schemas/dm_service.py
+++ b/runtime/label-studio-adapter/app/schemas/dm_service.py
@@ -0,0 +1,58 @@
 from pydantic import BaseModel, Field
 from typing import List, Optional, Dict, Any
 from datetime import datetime
 class DatasetFileResponse(BaseModel):
    """DM服务数据集文件响应模型"""
    id: str = Field(..., description="文件ID")
    fileName: str = Field(..., description="文件名")
    fileType: str = Field(..., description="文件类型")
    filePath: str = Field(..., description="文件路径")
    originalName: Optional[str] = Field(None, description="原始文件名")
    size: Optional[int] = Field(None, description="文件大小（字节）")
    status: Optional[str] = Field(None, description="文件状态")
    uploadedAt: Optional[datetime] = Field(None, description="上传时间")
    description: Optional[str] = Field(None, description="文件描述")
    uploadedBy: Optional[str] = Field(None, description="上传者")
    lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间")
 class PagedDatasetFileResponse(BaseModel):
    """DM服务分页文件响应模型"""
    content: List[DatasetFileResponse] = Field(..., description="文件列表")
    totalElements: int = Field(..., description="总元素数")
    totalPages: int = Field(..., description="总页数")
    page: int = Field(..., description="当前页码")
    size: int = Field(..., description="每页大小")
 class DatasetTypeResponse(BaseModel):
    """数据集类型响应模型"""
    code: str = Field(..., description="类型编码")
    name: str = Field(..., description="类型名称")
    description: Optional[str] = Field(None, description="类型描述")
    supportedFormats: List[str] = Field(default_factory=list, description="支持的文件格式")
    icon: Optional[str] = Field(None, description="图标")
 class DatasetResponse(BaseModel):
    """DM服务数据集响应模型"""
    id: str = Field(..., description="数据集ID")
    name: str = Field(..., description="数据集名称")
    description: Optional[str] = Field(None, description="数据集描述")
    datasetType: str = Field(..., description="数据集类型", alias="datasetType")
    status: str = Field(..., description="数据集状态")
    fileCount: int = Field(..., description="文件数量")
    totalSize: int = Field(..., description="总大小（字节）")
    createdAt: Optional[datetime] = Field(None, description="创建时间")
    updatedAt: Optional[datetime] = Field(None, description="更新时间")
    createdBy: Optional[str] = Field(None, description="创建者")
    # 为了向后兼容，添加一个属性方法返回类型对象
    @property
    def type(self) -> DatasetTypeResponse:
        """兼容属性：返回类型对象"""
        return DatasetTypeResponse(
            code=self.datasetType,
            name=self.datasetType,
            description=None,
            supportedFormats=[],
            icon=None
        )
--- a/runtime/label-studio-adapter/app/schemas/label_studio.py
+++ b/runtime/label-studio-adapter/app/schemas/label_studio.py
@@ -0,0 +1,37 @@
 from pydantic import BaseModel, Field
 from typing import Dict, Any, Optional, List
 from datetime import datetime
 class LabelStudioProject(BaseModel):
    """Label Studio项目模型"""
    id: int = Field(..., description="项目ID")
    title: str = Field(..., description="项目标题")
    description: Optional[str] = Field(None, description="项目描述")
    label_config: str = Field(..., description="标注配置")
    created_at: Optional[datetime] = Field(None, description="创建时间")
    updated_at: Optional[datetime] = Field(None, description="更新时间")
 class LabelStudioTaskData(BaseModel):
    """Label Studio任务数据模型"""
    image: Optional[str] = Field(None, description="图像URL")
    text: Optional[str] = Field(None, description="文本内容")
    audio: Optional[str] = Field(None, description="音频URL")
    video: Optional[str] = Field(None, description="视频URL")
    filename: Optional[str] = Field(None, description="文件名")
 class LabelStudioTask(BaseModel):
    """Label Studio任务模型"""
    data: LabelStudioTaskData = Field(..., description="任务数据")
    project: Optional[int] = Field(None, description="项目ID")
    meta: Optional[Dict[str, Any]] = Field(None, description="元数据")
 class LabelStudioCreateProjectRequest(BaseModel):
    """创建Label Studio项目请求模型"""
    title: str = Field(..., description="项目标题")
    description: str = Field("", description="项目描述")
    label_config: str = Field(..., description="标注配置")
 class LabelStudioCreateTaskRequest(BaseModel):
    """创建Label Studio任务请求模型"""
    data: Dict[str, Any] = Field(..., description="任务数据")
    project: Optional[int] = Field(None, description="项目ID")
--- a/runtime/label-studio-adapter/app/services/init.py
+++ b/runtime/label-studio-adapter/app/services/init.py
@@ -0,0 +1,6 @@
 # app/services/__init__.py
 from .dataset_mapping_service import DatasetMappingService
 from .sync_service import SyncService
 __all__ = ["DatasetMappingService", "SyncService"]
--- a/runtime/label-studio-adapter/app/services/dataset_mapping_service.py
+++ b/runtime/label-studio-adapter/app/services/dataset_mapping_service.py
@@ -0,0 +1,223 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
 from sqlalchemy import update
 from typing import Optional, List
 from datetime import datetime
 import uuid
 from app.models.dataset_mapping import DatasetMapping
 from app.schemas.dataset_mapping import (
    DatasetMappingCreateRequest, 
    DatasetMappingUpdateRequest, 
    DatasetMappingResponse
 )
 from app.core.logging import get_logger
 logger = get_logger(__name__)
 class DatasetMappingService:
    """数据集映射服务"""
    def __init__(self, db: AsyncSession):
        self.db = db
    async def create_mapping(
        self, 
        mapping_data: DatasetMappingCreateRequest, 
        labelling_project_id: str,
        labelling_project_name: str
    ) -> DatasetMappingResponse:
        """创建数据集映射"""
        logger.info(f"Create dataset mapping: {mapping_data.source_dataset_id} -> {labelling_project_id}")
        db_mapping = DatasetMapping(
            mapping_id=str(uuid.uuid4()),
            source_dataset_id=mapping_data.source_dataset_id,
            labelling_project_id=labelling_project_id,
            labelling_project_name=labelling_project_name
        )
        self.db.add(db_mapping)
        await self.db.commit()
        await self.db.refresh(db_mapping)
        logger.info(f"Mapping created: {db_mapping.mapping_id}")
        return DatasetMappingResponse.model_validate(db_mapping)
    async def get_mapping_by_source_uuid(
        self, 
        source_dataset_id: str
    ) -> Optional[DatasetMappingResponse]:
        """根据源数据集ID获取映射（返回第一个未删除的）"""
        logger.debug(f"Get mapping by source dataset id: {source_dataset_id}")
        result = await self.db.execute(
            select(DatasetMapping).where(
                DatasetMapping.source_dataset_id == source_dataset_id,
                DatasetMapping.deleted_at.is_(None)
            )
        )
        mapping = result.scalar_one_or_none()
        if mapping:
            logger.debug(f"Found mapping: {mapping.mapping_id}")
            return DatasetMappingResponse.model_validate(mapping)
        logger.debug(f"No mapping found for source dataset id: {source_dataset_id}")
        return None
    async def get_mappings_by_source_dataset_id(
        self, 
        source_dataset_id: str,
        include_deleted: bool = False
    ) -> List[DatasetMappingResponse]:
        """根据源数据集ID获取所有映射关系"""
        logger.debug(f"Get all mappings by source dataset id: {source_dataset_id}")
        query = select(DatasetMapping).where(
            DatasetMapping.source_dataset_id == source_dataset_id
        )
        if not include_deleted:
            query = query.where(DatasetMapping.deleted_at.is_(None))
        result = await self.db.execute(
            query.order_by(DatasetMapping.created_at.desc())
        )
        mappings = result.scalars().all()
        logger.debug(f"Found {len(mappings)} mappings")
        return [DatasetMappingResponse.model_validate(mapping) for mapping in mappings]
    async def get_mapping_by_labelling_project_id(
        self, 
        labelling_project_id: str
    ) -> Optional[DatasetMappingResponse]:
        """根据Label Studio项目ID获取映射"""
        logger.debug(f"Get mapping by Label Studio project id: {labelling_project_id}")
        result = await self.db.execute(
            select(DatasetMapping).where(
                DatasetMapping.labelling_project_id == labelling_project_id,
                DatasetMapping.deleted_at.is_(None)
            )
        )
        mapping = result.scalar_one_or_none()
        if mapping:
            logger.debug(f"Found mapping: {mapping.mapping_id}")
            return DatasetMappingResponse.model_validate(mapping)
        logger.debug(f"No mapping found for Label Studio project id: {labelling_project_id}")
        return None
    async def get_mapping_by_uuid(self, mapping_id: str) -> Optional[DatasetMappingResponse]:
        """根据映射UUID获取映射"""
        logger.debug(f"Get mapping: {mapping_id}")
        result = await self.db.execute(
            select(DatasetMapping).where(
                DatasetMapping.mapping_id == mapping_id,
                DatasetMapping.deleted_at.is_(None)
            )
        )
        mapping = result.scalar_one_or_none()
        if mapping:
            logger.debug(f"Found mapping: {mapping.mapping_id}")
            return DatasetMappingResponse.model_validate(mapping)
        logger.debug(f"Mapping not found: {mapping_id}")
        return None
    async def update_mapping(
        self, 
        mapping_id: str, 
        update_data: DatasetMappingUpdateRequest
    ) -> Optional[DatasetMappingResponse]:
        """更新映射信息"""
        logger.info(f"Update mapping: {mapping_id}")
        mapping = await self.get_mapping_by_uuid(mapping_id)
        if not mapping:
            return None
        update_values = update_data.model_dump(exclude_unset=True)
        update_values["last_updated_at"] = datetime.utcnow()
        result = await self.db.execute(
            update(DatasetMapping)
            .where(DatasetMapping.mapping_id == mapping_id)
            .values(**update_values)
        )
        await self.db.commit()
        if result.rowcount > 0:
            return await self.get_mapping_by_uuid(mapping_id)
        return None
    async def update_last_updated_at(self, mapping_id: str) -> bool:
        """更新最后更新时间"""
        logger.debug(f"Update mapping last updated at: {mapping_id}")
        result = await self.db.execute(
            update(DatasetMapping)
            .where(
                DatasetMapping.mapping_id == mapping_id,
                DatasetMapping.deleted_at.is_(None)
            )
            .values(last_updated_at=datetime.utcnow())
        )
        await self.db.commit()
        return result.rowcount > 0
    async def soft_delete_mapping(self, mapping_id: str) -> bool:
        """软删除映射"""
        logger.info(f"Soft delete mapping: {mapping_id}")
        result = await self.db.execute(
            update(DatasetMapping)
            .where(
                DatasetMapping.mapping_id == mapping_id,
                DatasetMapping.deleted_at.is_(None)
            )
            .values(deleted_at=datetime.utcnow())
        )
        await self.db.commit()
        success = result.rowcount > 0
        if success:
            logger.info(f"Mapping soft-deleted: {mapping_id}")
        else:
            logger.warning(f"Mapping not exists or already deleted: {mapping_id}")
        return success
    async def get_all_mappings(
        self, 
        skip: int = 0, 
        limit: int = 100
    ) -> List[DatasetMappingResponse]:
        """获取所有有效映射"""
        logger.debug(f"List all mappings, skip: {skip}, limit: {limit}")
        result = await self.db.execute(
            select(DatasetMapping)
            .where(DatasetMapping.deleted_at.is_(None))
            .offset(skip)
            .limit(limit)
            .order_by(DatasetMapping.created_at.desc())
        )
        mappings = result.scalars().all()
        logger.debug(f"Found {len(mappings)} mappings")
        return [DatasetMappingResponse.model_validate(mapping) for mapping in mappings]
    async def count_mappings(self) -> int:
        """统计映射总数"""
        result = await self.db.execute(
            select(DatasetMapping)
            .where(DatasetMapping.deleted_at.is_(None))
        )
        mappings = result.scalars().all()
        return len(mappings)
--- a/runtime/label-studio-adapter/app/services/sync_service.py
+++ b/runtime/label-studio-adapter/app/services/sync_service.py
@@ -0,0 +1,276 @@
 from typing import Optional, List, Dict, Any, Tuple
 from app.clients.dm_client import DMServiceClient
 from app.clients.label_studio_client import LabelStudioClient
 from app.services.dataset_mapping_service import DatasetMappingService
 from app.schemas.dataset_mapping import SyncDatasetResponse
 from app.core.logging import get_logger
 from app.core.config import settings
 from app.exceptions import NoDatasetInfoFoundError, DatasetMappingNotFoundError
 logger = get_logger(__name__)
 class SyncService:
    """数据同步服务"""
    def __init__(
        self, 
        dm_client: DMServiceClient, 
        ls_client: LabelStudioClient,
        mapping_service: DatasetMappingService
    ):
        self.dm_client = dm_client
        self.ls_client = ls_client
        self.mapping_service = mapping_service
    def determine_data_type(self, file_type: str) -> str:
        """根据文件类型确定数据类型"""
        file_type_lower = file_type.lower()
        if any(ext in file_type_lower for ext in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'svg', 'webp']):
            return 'image'
        elif any(ext in file_type_lower for ext in ['mp3', 'wav', 'flac', 'aac', 'ogg']):
            return 'audio'
        elif any(ext in file_type_lower for ext in ['mp4', 'avi', 'mov', 'wmv', 'flv', 'webm']):
            return 'video'
        elif any(ext in file_type_lower for ext in ['txt', 'doc', 'docx', 'pdf']):
            return 'text'
        else:
            return 'image'  # 默认为图像类型
    async def get_existing_dm_file_mapping(self, project_id: str) -> Dict[str, int]:
        """
        获取Label Studio项目中已存在的DM文件ID到任务ID的映射
        Args:
            project_id: Label Studio项目ID
        Returns:
            dm_file_id到task_id的映射字典
        """
        try:
            logger.info(f"Fetching existing task mappings for project {project_id} (page_size={settings.ls_task_page_size})")
            dm_file_to_task_mapping = {}
            # 使用Label Studio客户端封装的方法获取所有任务
            page_size = getattr(settings, 'ls_task_page_size', 1000)
            # 调用封装好的方法获取所有任务，page=None表示获取全部
            result = await self.ls_client.get_project_tasks(
                project_id=project_id,
                page=None,  # 不指定page，获取所有任务
                page_size=page_size
            )
            if not result:
                logger.warning(f"Failed to fetch tasks for project {project_id}")
                return {}
            all_tasks = result.get("tasks", [])
            # 遍历所有任务，构建映射
            for task in all_tasks:
                # 检查任务的meta字段中是否有dm_file_id
                meta = task.get('meta')
                if meta:
                    dm_file_id = meta.get('dm_file_id')
                    if dm_file_id:
                        task_id = task.get('id')
                        if task_id:
                            dm_file_to_task_mapping[str(dm_file_id)] = task_id
            logger.info(f"Found {len(dm_file_to_task_mapping)} existing task mappings")
            return dm_file_to_task_mapping
        except Exception as e:
            logger.error(f"Error while fetching existing tasks: {e}")
            return {}  # 发生错误时返回空字典，会同步所有文件
    async def sync_dataset_files(
        self, 
        mapping_id: str, 
        batch_size: int = 50
    ) -> SyncDatasetResponse:
        """同步数据集文件到Label Studio"""
        logger.info(f"Start syncing dataset by mapping: {mapping_id}")
        # 获取映射关系
        mapping = await self.mapping_service.get_mapping_by_uuid(mapping_id)
        if not mapping:
            logger.error(f"Dataset mapping not found: {mapping_id}")
            return SyncDatasetResponse(
                mapping_id="",
                status="error",
                synced_files=0,
                total_files=0,
                message=f"Dataset mapping not found: {mapping_id}"
            )
        try:
            # 获取数据集信息
            dataset_info = await self.dm_client.get_dataset(mapping.source_dataset_id)
            if not dataset_info:
                raise NoDatasetInfoFoundError(mapping.source_dataset_id)
            synced_files = 0
            deleted_tasks = 0
            total_files = dataset_info.fileCount
            page = 0
            logger.info(f"Total files in dataset: {total_files}")
            # 获取Label Studio中已存在的DM文件ID到任务ID的映射
            existing_dm_file_mapping = await self.get_existing_dm_file_mapping(mapping.labelling_project_id)
            existing_dm_file_ids = set(existing_dm_file_mapping.keys())
            logger.info(f"{len(existing_dm_file_ids)} tasks already exist in Label Studio")
            # 收集DM中当前存在的所有文件ID
            current_dm_file_ids = set()
            # 分页获取并同步文件
            while True:
                files_response = await self.dm_client.get_dataset_files(
                    mapping.source_dataset_id, 
                    page=page, 
                    size=batch_size,
                    status="COMPLETED"  # 只同步已完成的文件
                )
                if not files_response or not files_response.content:
                    logger.info(f"No more files on page {page + 1}")
                    break
                logger.info(f"Processing page {page + 1}, total {len(files_response.content)} files")
                # 筛选出新文件并批量创建任务
                tasks = []
                new_files_count = 0
                existing_files_count = 0
                for file_info in files_response.content:
                    # 记录当前DM中存在的文件ID
                    current_dm_file_ids.add(str(file_info.id))
                    # 检查文件是否已存在
                    if str(file_info.id) in existing_dm_file_ids:
                        existing_files_count += 1
                        logger.debug(f"Skip existing file: {file_info.originalName} (ID: {file_info.id})")
                        continue
                    new_files_count += 1
                    # 确定数据类型
                    data_type = self.determine_data_type(file_info.fileType)
                    # 替换文件路径前缀:只替换开头的前缀，不影响路径中间可能出现的相同字符串
                    file_path = file_info.filePath.removeprefix(settings.dm_file_path_prefix)
                    file_path = settings.label_studio_file_path_prefix + file_path
                    # 构造任务数据
                    task_data = {
                        "data": {
                            data_type: file_path
                        },
                        "meta": {
                            "file_size": file_info.size,
                            "file_type": file_info.fileType,
                            "dm_dataset_id": mapping.source_dataset_id,
                            "dm_file_id": file_info.id,
                            "original_name": file_info.originalName,
                        }
                    }
                    tasks.append(task_data)
                logger.info(f"Page {page + 1}: new files {new_files_count}, existing files {existing_files_count}")
                # 批量创建Label Studio任务
                if tasks:
                    batch_result = await self.ls_client.create_tasks_batch(
                        mapping.labelling_project_id,
                        tasks
                    )
                    if batch_result:
                        synced_files += len(tasks)
                        logger.info(f"Successfully synced {len(tasks)} files")
                    else:
                        logger.warning(f"Batch task creation failed, fallback to single creation")
                        # 如果批量创建失败，尝试单个创建
                        for task_data in tasks:
                            task_result = await self.ls_client.create_task(
                                mapping.labelling_project_id,
                                task_data["data"],
                                task_data.get("meta")
                            )
                            if task_result:
                                synced_files += 1
                # 检查是否还有更多页面
                if page >= files_response.totalPages - 1:
                    break
                page += 1
            # 清理在DM中不存在但在Label Studio中存在的任务
            tasks_to_delete = []
            for dm_file_id, task_id in existing_dm_file_mapping.items():
                if dm_file_id not in current_dm_file_ids:
                    tasks_to_delete.append(task_id)
                    logger.debug(f"Mark task for deletion: {task_id} (DM file ID: {dm_file_id})")
            if tasks_to_delete:
                logger.info(f"Deleting {len(tasks_to_delete)} tasks not present in DM")
                delete_result = await self.ls_client.delete_tasks_batch(tasks_to_delete)
                deleted_tasks = delete_result.get("successful", 0)
                logger.info(f"Successfully deleted {deleted_tasks} tasks")
            else:
                logger.info("No tasks to delete")
            # 更新映射的最后更新时间
            await self.mapping_service.update_last_updated_at(mapping.mapping_id)
            logger.info(f"Sync completed: total_files={total_files}, created={synced_files}, deleted={deleted_tasks}")
            return SyncDatasetResponse(
                mapping_id=mapping.mapping_id,
                status="success",
                synced_files=synced_files,
                total_files=total_files,
                message=f"Sync completed: created {synced_files} files, deleted {deleted_tasks} tasks"
            )
        except Exception as e:
            logger.error(f"Error while syncing dataset: {e}")
            return SyncDatasetResponse(
                mapping_id=mapping.mapping_id,
                status="error",
                synced_files=0,
                total_files=0,
                message=f"Sync failed: {str(e)}"
            )
    async def get_sync_status(
        self, 
        source_dataset_id: str
    ) -> Optional[Dict[str, Any]]:
        """获取同步状态"""
        mapping = await self.mapping_service.get_mapping_by_source_uuid(source_dataset_id)
        if not mapping:
            return None
        # 获取DM数据集信息
        dataset_info = await self.dm_client.get_dataset(source_dataset_id)
        # 获取Label Studio项目任务数量
        tasks_info = await self.ls_client.get_project_tasks(mapping.labelling_project_id)
        return {
            "mapping_id": mapping.mapping_id,
            "source_dataset_id": source_dataset_id,
            "labelling_project_id": mapping.labelling_project_id,
            "last_updated_at": mapping.last_updated_at,
            "dm_total_files": dataset_info.fileCount if dataset_info else 0,
            "ls_total_tasks": tasks_info.get("count", 0) if tasks_info else 0,
            "sync_ratio": (
                tasks_info.get("count", 0) / dataset_info.fileCount 
                if dataset_info and dataset_info.fileCount > 0 and tasks_info else 0
            )
        }
--- a/runtime/label-studio-adapter/deploy/docker-entrypoint.sh
+++ b/runtime/label-studio-adapter/deploy/docker-entrypoint.sh
@@ -0,0 +1,64 @@
 #!/bin/bash
 set -e
 echo "=========================================="
 echo "Label Studio Adapter Starting..."
 echo "=========================================="
 # Label Studio 本地存储基础路径（从环境变量获取，默认值）
 LABEL_STUDIO_LOCAL_BASE="${LABEL_STUDIO_LOCAL_BASE:-/label-studio/local_files}"
 echo "=========================================="
 echo "Ensuring Label Studio local storage directories exist..."
 echo "Base path: ${LABEL_STUDIO_LOCAL_BASE}"
 echo "=========================================="
 # 创建必要的目录
 mkdir -p "${LABEL_STUDIO_LOCAL_BASE}/dataset"
 mkdir -p "${LABEL_STUDIO_LOCAL_BASE}/upload"
 echo "✓ Directory 'dataset' ready: ${LABEL_STUDIO_LOCAL_BASE}/dataset"
 echo "✓ Directory 'upload' ready: ${LABEL_STUDIO_LOCAL_BASE}/upload"
 echo "=========================================="
 echo "Directory initialization completed"
 echo "=========================================="
 # 等待数据库就绪（如果配置了数据库）
 if [ -n "$MYSQL_HOST" ] || [ -n "$POSTGRES_HOST" ]; then
    echo "Waiting for database to be ready..."
    sleep 5
 fi
 # 运行数据库迁移
 echo "=========================================="
 echo "Running database migrations..."
 echo "=========================================="
 alembic upgrade head
 if [ $? -eq 0 ]; then
    echo "✓ Database migrations completed successfully"
 else
    echo "⚠️  WARNING: Database migrations failed"
    echo "    The application may not work correctly"
 fi
 echo "=========================================="
 # 启动应用
 echo "Starting Label Studio Adapter..."
 echo "Host: ${HOST:-0.0.0.0}"
 echo "Port: ${PORT:-18000}"
 echo "Debug: ${DEBUG:-false}"
 echo "Label Studio URL: ${LABEL_STUDIO_BASE_URL}"
 echo "=========================================="
 # 转换 LOG_LEVEL 为小写（uvicorn 要求小写）
 LOG_LEVEL_LOWER=$(echo "${LOG_LEVEL:-info}" | tr '[:upper:]' '[:lower:]')
 # 使用 uvicorn 启动应用
 exec uvicorn app.main:app \
    --host "${HOST:-0.0.0.0}" \
    --port "${PORT:-18000}" \
    --log-level "${LOG_LEVEL_LOWER}" \
    ${DEBUG:+--reload}
--- a/runtime/label-studio-adapter/requirements.txt
+++ b/runtime/label-studio-adapter/requirements.txt
--- a/runtime/ops/requirements.txt
+++ b/runtime/ops/requirements.txt
@@ -19,4 +19,3 @@ xmltodict==1.0.2
 zhconv==1.4.3
 sqlalchemy==2.0.40
 pymysql==1.1.1
 unstructured[pdf,docx]==0.18.15
--- a/runtime/python-executor/datamate/operator_runtime.py
+++ b/runtime/python-executor/datamate/operator_runtime.py
@@ -15,7 +15,7 @@ from datamate.scheduler import func_scheduler
 from datamate.wrappers import WRAPPERS
 # 日志配置
-LOG_DIR = "/var/log/data-mate/runtime"
+LOG_DIR = "/var/log/datamate/runtime"
 os.makedirs(LOG_DIR, exist_ok=True)
 logger.add(
    f"{LOG_DIR}/runtime.log",
--- a/runtime/python-executor/pyproject.toml
+++ b/runtime/python-executor/pyproject.toml
@@ -17,7 +17,7 @@ classifiers = [
 # Core dependencies
 dependencies = [
-    "uvicorn",
+    "uvicorn[standard]",
    "fastapi",
    "loguru",
    "jsonargparse",
--- a/scripts/db/data-cleaning-init.sql
+++ b/scripts/db/data-cleaning-init.sql
@@ -22,6 +22,7 @@ CREATE TABLE IF NOT EXISTS t_clean_task
    dest_dataset_name varchar(64),
    before_size       bigint,
    after_size        bigint,
    file_count        int,
    created_at        timestamp default current_timestamp,
    started_at        timestamp,
    finished_at       timestamp,
--- a/scripts/db/data-operator-init.sql
+++ b/scripts/db/data-operator-init.sql
@@ -111,113 +111,30 @@ VALUES ('TextFormatter', 'TXT文本抽取', '抽取TXT中的文本。', '1.0.0',
 INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
-VALUES (3, 'TextFormatter'),
+SELECT c.id, o.id
-       (7, 'FileExporter'),
+FROM t_operator_category c
-       (8, 'TextFormatter'),
+CROSS JOIN t_operator o
-       (8, 'FileExporter'),
+WHERE c.id IN (3, 8, 11)
-       (3, 'FileWithShortOrLongLengthFilter'),
+AND o.id IN ('TextFormatter', 'FileWithShortOrLongLengthFilter', 'FileWithHighRepeatPhraseRateFilter',
-       (3, 'FileWithHighRepeatPhraseRateFilter'),
+            'FileWithHighRepeatWordRateFilter', 'FileWithHighSpecialCharRateFilter', 'FileWithManySensitiveWordsFilter',
-       (3, 'FileWithHighRepeatWordRateFilter'),
+            'DuplicateFilesFilter', 'DuplicateSentencesFilter', 'AnonymizedCreditCardNumber', 'AnonymizedIdNumber',
-       (3, 'FileWithHighSpecialCharRateFilter'),
+            'AnonymizedIpAddress', 'AnonymizedPhoneNumber', 'AnonymizedUrlCleaner', 'HtmlTagCleaner', 'XMLTagCleaner',
-       (3, 'FileWithManySensitiveWordsFilter'),
+            'ContentCleaner', 'EmailNumberCleaner', 'EmojiCleaner', 'ExtraSpaceCleaner', 'FullWidthCharacterCleaner',
-       (3, 'DuplicateFilesFilter'),
+            'GrableCharactersCleaner', 'InvisibleCharactersCleaner', 'LegendCleaner', 'PoliticalWordCleaner',
-       (3, 'DuplicateSentencesFilter'),
+            'SexualAndViolentWordCleaner', 'TraditionalChineseCleaner', 'UnicodeSpaceCleaner');
-       (3, 'AnonymizedCreditCardNumber'),
+
-       (3, 'AnonymizedIdNumber'),
+INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
-       (3, 'AnonymizedIpAddress'),
+SELECT c.id, o.id
-       (3, 'AnonymizedPhoneNumber'),
+FROM t_operator_category c
-       (3, 'AnonymizedUrlCleaner'),
+       CROSS JOIN t_operator o
-       (3, 'HtmlTagCleaner'),
+WHERE c.id IN (4, 8, 11)
-       (3, 'XMLTagCleaner'),
+  AND o.id IN ('ImgFormatter', 'ImgBlurredImagesCleaner', 'ImgBrightness', 'ImgContrast', 'ImgDenoise',
-       (3, 'ContentCleaner'),
+               'ImgDuplicatedImagesCleaner', 'ImgPerspectiveTransformation', 'ImgResize', 'ImgSaturation',
-       (3, 'EmailNumberCleaner'),
+               'ImgShadowRemove', 'ImgSharpness', 'ImgSimilarImagesCleaner', 'ImgTypeUnify');
-       (3, 'EmojiCleaner'),
+
-       (3, 'ExtraSpaceCleaner'),
+INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
-       (3, 'FullWidthCharacterCleaner'),
+SELECT c.id, o.id
-       (3, 'GrableCharactersCleaner'),
+FROM t_operator_category c
-       (3, 'InvisibleCharactersCleaner'),
+       CROSS JOIN t_operator o
-       (3, 'LegendCleaner'),
+WHERE c.id IN (7, 8, 11)
-       (3, 'PoliticalWordCleaner'),
+  AND o.id IN ('FileExporter');
       (3, 'SexualAndViolentWordCleaner'),
       (3, 'TraditionalChineseCleaner'),
       (3, 'UnicodeSpaceCleaner'),
       (4, 'ImgFormatter'),
       (4, 'ImgBlurredImagesCleaner'),
       (4, 'ImgBrightness'),
       (4, 'ImgContrast'),
       (4, 'ImgDenoise'),
       (4, 'ImgDuplicatedImagesCleaner'),
       (4, 'ImgPerspectiveTransformation'),
       (4, 'ImgResize'),
       (4, 'ImgSaturation'),
       (4, 'ImgShadowRemove'),
       (4, 'ImgSharpness'),
       (4, 'ImgSimilarImagesCleaner'),
       (4, 'ImgTypeUnify'),
       (8, 'FileWithShortOrLongLengthFilter'),
       (8, 'FileWithHighRepeatPhraseRateFilter'),
       (8, 'FileWithHighRepeatWordRateFilter'),
       (8, 'FileWithHighSpecialCharRateFilter'),
       (8, 'FileWithManySensitiveWordsFilter'),
       (8, 'DuplicateFilesFilter'),
       (8, 'DuplicateSentencesFilter'),
       (8, 'AnonymizedCreditCardNumber'),
       (8, 'AnonymizedIdNumber'),
       (8, 'AnonymizedIpAddress'),
       (8, 'AnonymizedPhoneNumber'),
       (8, 'AnonymizedUrlCleaner'),
       (8, 'HtmlTagCleaner'),
       (8, 'XMLTagCleaner'),
       (8, 'ContentCleaner'),
       (8, 'EmailNumberCleaner'),
       (8, 'EmojiCleaner'),
       (8, 'ExtraSpaceCleaner'),
       (8, 'FullWidthCharacterCleaner'),
       (8, 'GrableCharactersCleaner'),
       (8, 'InvisibleCharactersCleaner'),
       (8, 'LegendCleaner'),
       (8, 'PoliticalWordCleaner'),
       (8, 'SexualAndViolentWordCleaner'),
       (8, 'TraditionalChineseCleaner'),
       (8, 'UnicodeSpaceCleaner'),
       (11, 'TextFormatter'),
       (11, 'FileExporter'),
       (11, 'FileWithShortOrLongLengthFilter'),
       (11, 'FileWithHighRepeatPhraseRateFilter'),
       (11, 'FileWithHighRepeatWordRateFilter'),
       (11, 'FileWithHighSpecialCharRateFilter'),
       (11, 'FileWithManySensitiveWordsFilter'),
       (11, 'DuplicateFilesFilter'),
       (11, 'DuplicateSentencesFilter'),
       (11, 'AnonymizedCreditCardNumber'),
       (11, 'AnonymizedIdNumber'),
       (11, 'AnonymizedIpAddress'),
       (11, 'AnonymizedPhoneNumber'),
       (11, 'AnonymizedUrlCleaner'),
       (11, 'HtmlTagCleaner'),
       (11, 'XMLTagCleaner'),
       (11, 'ContentCleaner'),
       (11, 'EmailNumberCleaner'),
       (11, 'EmojiCleaner'),
       (11, 'ExtraSpaceCleaner'),
       (11, 'FullWidthCharacterCleaner'),
       (11, 'GrableCharactersCleaner'),
       (11, 'InvisibleCharactersCleaner'),
       (11, 'LegendCleaner'),
       (11, 'PoliticalWordCleaner'),
       (11, 'SexualAndViolentWordCleaner'),
       (11, 'TraditionalChineseCleaner'),
       (11, 'UnicodeSpaceCleaner'),
       (11, 'ImgFormatter'),
       (11, 'ImgBlurredImagesCleaner'),
       (11, 'ImgBrightness'),
       (11, 'ImgContrast'),
       (11, 'ImgDenoise'),
       (11, 'ImgDuplicatedImagesCleaner'),
       (11, 'ImgPerspectiveTransformation'),
       (11, 'ImgResize'),
       (11, 'ImgSaturation'),
       (11, 'ImgShadowRemove'),
       (11, 'ImgSharpness'),
       (11, 'ImgSimilarImagesCleaner'),
       (11, 'ImgTypeUnify');
--- a/scripts/images/datax/Dockerfile
+++ b/scripts/images/datax/Dockerfile
@@ -1,33 +0,0 @@
 FROM maven:3-openjdk-8-slim AS builder
 RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
    sed -i 's/security.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
    apt-get update && \
    apt-get install -y git && \
    git clone https://github.com/alibaba/DataX.git
 COPY runtime/datax/ DataX/
 RUN cd DataX && \
    sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
    plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
    mvn -U clean package assembly:assembly -Dmaven.test.skip=true
 FROM openjdk:8-jdk-slim
 RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
    sed -i 's/security.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
    apt-get update && \
    apt-get install -y python3 python3-pip python-is-python3 vim wget curl nfs-common rsync && \
    apt-get clean && \
    rm -rf /var/lib/apy/lists/*
 RUN pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple && \
    pip config --user set global.trusted-host mirrors.aliyun.com && \
    pip install fastapi uvicorn[standard] && \
    pip cache purge
 COPY --from=builder /DataX/target/datax/datax /opt/datax
 COPY scripts/images/datax/app.py /opt/datax/bin/app.py
--- a/scripts/images/datax/app.py
+++ b/scripts/images/datax/app.py
@@ -1,52 +0,0 @@
 import subprocess
 import tempfile
 from fastapi import FastAPI
 from pydantic import BaseModel
 app = FastAPI(title="datax")
 class CreateJobParam(BaseModel):
    content: str
@app.post("/process", tags=["run datax.py"])
 async def process(job: CreateJobParam):
    output = {
        "status": "failed",
    }
    try:
        # 创建临时文件存储Python脚本
        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=True) as f:
            f.write(job.content)
            f.seek(0)
            cmd_args = ["python3", "/opt/datax/bin/datax.py", f.name]
            result = subprocess.run(
                cmd_args,
                capture_output=True,
                text=True,
                check=True
            )
            output["status"] = result.returncode
            if result.returncode != 0:
                output["stdout"] = result.stdout
                output["stderr"] = result.stderr
    except subprocess.TimeoutExpired as e:
        output["status"] = 408
        output["stderr"] = f"The script execution timed out: {e.stderr}"
    except subprocess.CalledProcessError as e:
        output["status"] = 500
        output["stderr"] = f"Script execution failed: {e.stdout}"
    except Exception as e:
        output["status"] = 500
        output["stderr"] = f"Server error: {str(e)}"
    return output
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/scripts/images/frontend/Dockerfile
+++ b/scripts/images/frontend/Dockerfile
@@ -10,7 +10,7 @@ RUN if [ -f package-lock.json ]; then npm ci; else npm install; fi && \
 FROM nginx:1.29 AS runner
 COPY --from=builder /app/dist /opt/frontend
-COPY scripts/images/frontend/edm.conf /etc/nginx/conf.d/default.conf
+COPY scripts/images/frontend/backend.conf /etc/nginx/conf.d/default.conf
 RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
--- a/scripts/images/frontend/backend.conf
+++ b/scripts/images/frontend/backend.conf
@@ -2,8 +2,13 @@ server {
    listen       80;
    server_name  0.0.0.0;
    access_log /var/log/datamate/frontend/access.log main;
    error_log /var/log/datamate/frontend/error.log notice;
    client_max_body_size 1024M;
    location /api/ {
-        proxy_pass http://backend:8080/api/;
+        proxy_pass http://datamate-backend:8080/api/;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
--- a/scripts/images/label-studio-adapter/Dockerfile
+++ b/scripts/images/label-studio-adapter/Dockerfile
@@ -0,0 +1,27 @@
 FROM python:3.11-slim
 WORKDIR /app
 # 安装系统依赖
 RUN apt-get update && apt-get install -y \
    gcc \
    && rm -rf /var/lib/apt/lists/*
 # 复制requirements文件
 COPY runtime/label-studio-adapter/requirements.txt .
 # 安装Python依赖
 RUN pip install --no-cache-dir -r requirements.txt
 # 复制应用代码
 COPY runtime/label-studio-adapter .
 # 复制并设置 entrypoint 脚本权限
 COPY runtime/label-studio-adapter/deploy/docker-entrypoint.sh /docker-entrypoint.sh
 RUN chmod +x /docker-entrypoint.sh
 # 暴露端口
 EXPOSE 8088
 # 使用 entrypoint 脚本启动
 ENTRYPOINT ["/docker-entrypoint.sh"]
--- a/scripts/images/mineru/Dockerfile
+++ b/scripts/images/mineru/Dockerfile
@@ -1,22 +0,0 @@
 FROM python:3.10-slim
 RUN apt-get update && \
    apt-get install -y curl vim libgl1 libgl1-mesa-glx libglib2.0-0 procps && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*
 RUN pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple && \
    pip config --user set global.trusted-host mirrors.aliyun.com && \
    pip install --upgrade setuptools && \
    pip install -U 'mineru[core]==2.1.0' --break-system-packages && \
    pip install torch==2.7.1+cpu -f https://download.pytorch.org/whl/torch/ && \
    pip install torchvision==0.22.1+cpu -f https://download.pytorch.org/whl/torchvision && \
    pip install requests==2.27.1 torch_npu==2.7.1rc1 numpy==1.26.0 decorator==5.2.1 einops==0.8.1 attrs==25.3.0 && \
    pip cache purge
 ENV CURL_CA_BUNDLE=""
 ENV TORCH_DEVICE_BACKEND_AUTOLOAD=0
 RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
 ENV MINERU_MODEL_SOURCE=local
--- a/scripts/images/runtime/Dockerfile
+++ b/scripts/images/runtime/Dockerfile
@@ -17,8 +17,6 @@ ENV HF_HUB_DISABLE_XET=1
 RUN pip install -e . -i https://mirrors.huaweicloud.com/repository/pypi/simple \
    && pip install -r /opt/runtime/datamate/ops/requirements.txt -i https://mirrors.huaweicloud.com/repository/pypi/simple \
-    && pip cache purge \
+    && pip cache purge
    && python -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()" \
    && python -c "from unstructured_inference.models.base import get_model; get_model()"
 RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
--- a/scripts/images/unstructured/Dockerfile
+++ b/scripts/images/unstructured/Dockerfile
@@ -1,9 +0,0 @@
 FROM downloads.unstructured.io/unstructured-io/unstructured
 RUN pip config --user set global.index https://mirrors.huaweicloud.com/repository/pypi && \
    pip config --user set global.index-url https://mirrors.huaweicloud.com/repository/pypi/simple && \
    pip config --user set global.trusted-host mirrors.huaweicloud.com && \
    pip install fastapi uvicorn && \
    pip cache purge
 COPY scripts/images/unstructured/app.py /app/app.py
--- a/scripts/images/unstructured/app.py
+++ b/scripts/images/unstructured/app.py
@@ -1,61 +0,0 @@
 import asyncio
 import os
 from typing import Optional
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from unstructured.partition.auto import partition
 app = FastAPI(title="unstructured")
 class FileProcessingRequest(BaseModel):
    """文件处理请求模型"""
    file_path: Optional[str] = None
    # 可添加其他可选字段
@app.post("/process", tags=["文件处理"])
 async def process_file(request_data: FileProcessingRequest):
    """处理文件并返回提取的文本内容"""
    try:
        file_path = request_data.file_path
        if not file_path:
            raise HTTPException(status_code=400, detail="缺少必要参数: filePath")
        if not os.path.exists(file_path):
            raise HTTPException(status_code=404, detail=f"文件不存在: {file_path}")
        # 异步执行可能耗时的文件处理操作
        text_content = await process_file_async(file_path)
        # 返回处理结果
        return {
            "filePath": file_path,
            "text": text_content,
            "status": "success"
        }
    except HTTPException as e:
        raise e
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"process failed: {str(e)}")
 async def process_file_async(file_path: str) -> str:
    """异步处理文件内容"""
    loop = asyncio.get_event_loop()
    return await loop.run_in_executor(None, partition_file_sync, file_path)
 def partition_file_sync(file_path: str) -> str:
    """同步处理文件内容（由异步函数调用）"""
    elements = partition(filename=file_path)
    return "\n\n".join([str(el) for el in elements])
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/scripts/save_images.sh
+++ b/scripts/save_images.sh
@@ -1,103 +0,0 @@
 #!/bin/bash
 # ==========================================================
 # 步骤 1: 定义帮助函数
 # ==========================================================
 # 脚本名称
 SCRIPT_NAME=$(basename "$0")
 help_message() {
    cat << EOF
 Usage: $SCRIPT_NAME [-d TARGET_DIR] [-h|--help]
 描述: 
  将预定义的 Docker 镜像列表保存为 .tar 文件。
 选项:
  -d TARGET_DIR  指定保存镜像的目标目录。
                 (绝对路径或相对路径)
                 如果未指定，将使用默认路径: $TARGET_DIR_DEFAULT
  -h, --help     显示此帮助信息并退出。
 示例:
  # 使用默认目录 (./dist)
  $SCRIPT_NAME
  # 指定保存到 /tmp/my-archive 目录
  $SCRIPT_NAME -d /tmp/my-archive
 EOF
 }
 # ==========================================================
 # 步骤 2: 定义默认值和处理参数
 # ==========================================================
 # 默认目标目录
 TARGET_DIR_DEFAULT="./dist"
 TARGET_DIR="$TARGET_DIR_DEFAULT"
 # 使用 getopts 处理命令行选项。
 # d: 表示 -d 选项后需要一个参数（目标目录）。
 while getopts "d:h" opt; do
    case ${opt} in
        d )
            # 如果 -d 选项被指定，使用传入的参数作为目标目录
            TARGET_DIR="$OPTARG"
            ;;
        h )
            # 如果是 -h 选项，显示帮助并退出
            help_message
            exit 0
            ;;
        \? )
            # 处理无效的选项
            echo "错误：无效选项 -$OPTARG" >&2
            help_message
            exit 1
            ;;
    esac
 done
 # 移动到下一个非选项参数 (通常此脚本没有其他参数，但这是最佳实践)
 shift $((OPTIND -1))
 # ==========================================================
 # 步骤 3: 脚本核心逻辑
 # ==========================================================
 # 检查/创建目标文件夹
 if ! mkdir -p "$TARGET_DIR"; then
    echo "❌ 致命错误：无法创建目标目录: $TARGET_DIR" >&2
    exit 1
 fi
 echo "目标目录已确认/创建: $TARGET_DIR"
 echo "----------------------------------------"
 # Image list
 images=("frontend:latest" "backend:latest" "runtime:latest" "mysql:8")
 for image in "${images[@]}"; do
    # 清理镜像名称，用 '_' 替换 ':'，以创建安全的文件名。
    safe_name="${image//[:]/_}"
    # 构造完整的输出文件路径。
    output_path="$TARGET_DIR/$safe_name.tar"
    echo "正在保存镜像 $image"
    echo "  -> 到文件 $output_path"
    # 执行 docker save 命令
    docker save -o "$output_path" "$image"
    # 检查保存是否成功 ($? 存储上一个命令的退出状态)
    if [ $? -eq 0 ]; then
        echo "✅ 保存成功。"
    else
        echo "❌ 保存失败！"
    fi
    echo ""
 done