feat: File and Annotation 2-way sync implementation (#63)

* feat: Refactor configuration and sync logic for improved dataset handling and logging

* feat: Enhance annotation synchronization and dataset file management

- Added new fields `tags_updated_at` to `DatasetFiles` model for tracking the last update time of tags.
- Implemented new asynchronous methods in the Label Studio client for fetching, creating, updating, and deleting task annotations.
- Introduced bidirectional synchronization for annotations between DataMate and Label Studio, allowing for flexible data management.
- Updated sync service to handle annotation conflicts based on timestamps, ensuring data integrity during synchronization.
- Enhanced dataset file response model to include tags and their update timestamps.
- Modified database initialization script to create a new column for `tags_updated_at` in the dataset files table.
- Updated requirements to ensure compatibility with the latest dependencies.
This commit is contained in:
Jason Wang
2025-11-07 15:03:07 +08:00
committed by GitHub
parent d136bad38c
commit 78f50ea520
16 changed files with 1336 additions and 290 deletions

View File

@@ -1,8 +1,6 @@
from pydantic_settings import BaseSettings
from pydantic import model_validator
from typing import Optional, List
import os
from pathlib import Path
from typing import Optional
class Settings(BaseSettings):
"""应用程序配置"""
@@ -10,39 +8,34 @@ class Settings(BaseSettings):
class Config:
env_file = ".env"
case_sensitive = False
extra = 'ignore' # 允许额外字段(如 Shell 脚本专用的环境变量)
extra = 'ignore'
# =========================
# Adapter 服务配置
# =========================
app_name: str = "Label Studio Adapter"
# Service
app_name: str = "DataMate Python Backend"
app_version: str = "1.0.0"
app_description: str = "Adapter for integrating Data Management System with Label Studio"
# 日志配置
log_level: str = "INFO"
debug: bool = True
log_file_dir: str = "/var/log/datamate"
# 服务器配置
host: str = "0.0.0.0"
port: int = 8000
# CORS配置
port: int = 18000
# CORS
# allowed_origins: List[str] = ["*"]
# allowed_methods: List[str] = ["*"]
# allowed_headers: List[str] = ["*"]
# MySQL数据库配置 (优先级1)
# Log
log_level: str = "INFO"
debug: bool = True
log_file_dir: str = "/var/log/datamate"
# Database
mysql_host: str = "datamate-database"
mysql_port: int = 3306
mysql_user: str = "root"
mysql_password: str = "password"
mysql_database: str = "datamate"
# 直接数据库URL配置(如果提供,将覆盖上述配置)
# 初始值为空字符串,在 model_validator 中会被设置为完整的 URL
database_url: str = ""
database_url: str = "" # Will be overridden by build_database_url() if not provided
@model_validator(mode='after')
def build_database_url(self):
@@ -55,22 +48,18 @@ class Settings(BaseSettings):
return self
# =========================
# Label Studio 服务配置
# =========================
# Label Studio
label_studio_base_url: str = "http://label-studio:8000"
label_studio_username: Optional[str] = "admin@demo.com" # Label Studio 用户名(用于登录)
label_studio_password: Optional[str] = "demoadmin" # Label Studio 密码(用于登录)
label_studio_username: Optional[str] = "admin@demo.com"
label_studio_password: Optional[str] = "demoadmin"
label_studio_user_token: Optional[str] = "abc123abc123" # Legacy Token
label_studio_local_storage_dataset_base_path: str = "/label-studio/local" # Label Studio容器中的本地存储基础路径
label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio本地文件服务路径前缀
label_studio_local_document_root: str = "/label-studio/local" # Label Studio local file storage path
label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio local file serving URL prefix
ls_task_page_size: int = 1000
# =========================
# Data Management 服务配置
# =========================
# DataMate
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
# 全局设置实例