Develop labeling module (#25)

* refactor: remove db table management from LS adapter (mv to scripts later); change adapter to use the same MySQL DB as other modules.

* refactor: Rename LS Adapter module to datamate-python
This commit is contained in:
Jinglong Wang
2025-10-27 16:16:14 +08:00
committed by GitHub
parent 46dfb389f1
commit 7f819563db
69 changed files with 1104 additions and 703 deletions

View File

@@ -0,0 +1,13 @@
# app/models/collection/__init__.py
from .task_execution import TaskExecution
from .collection_task import CollectionTask
from .task_log import TaskLog
from .datax_template import DataxTemplate
__all__ = [
"TaskExecution",
"CollectionTask",
"TaskLog",
"DataxTemplate"
]

View File

@@ -0,0 +1,28 @@
from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP
from sqlalchemy.sql import func
from app.db.database import Base
class CollectionTask(Base):
"""数据归集任务模型"""
__tablename__ = "t_dc_collection_tasks"
id = Column(String(36), primary_key=True, comment="任务ID(UUID)")
name = Column(String(255), nullable=False, comment="任务名称")
description = Column(Text, nullable=True, comment="任务描述")
sync_mode = Column(String(20), default='ONCE', comment="同步模式:ONCE/SCHEDULED")
config = Column(Text, nullable=False, comment="归集配置(DataX配置),包含源端和目标端配置信息")
schedule_expression = Column(String(255), nullable=True, comment="Cron调度表达式")
status = Column(String(20), default='DRAFT', comment="任务状态:DRAFT/READY/RUNNING/SUCCESS/FAILED/STOPPED")
retry_count = Column(Integer, default=3, comment="重试次数")
timeout_seconds = Column(Integer, default=3600, comment="超时时间(秒)")
max_records = Column(BigInteger, nullable=True, comment="最大处理记录数")
sort_field = Column(String(100), nullable=True, comment="增量字段")
last_execution_id = Column(String(36), nullable=True, comment="最后执行ID(UUID)")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_by = Column(String(255), nullable=True, comment="创建者")
updated_by = Column(String(255), nullable=True, comment="更新者")
def __repr__(self):
return f"<CollectionTask(id={self.id}, name={self.name}, status={self.status})>"

View File

@@ -0,0 +1,23 @@
from sqlalchemy import Column, String, Text, Boolean, TIMESTAMP
from sqlalchemy.sql import func
from app.db.database import Base
class DataxTemplate(Base):
"""DataX模板配置模型"""
__tablename__ = "t_dc_datax_templates"
id = Column(String(36), primary_key=True, comment="模板ID(UUID)")
name = Column(String(255), nullable=False, unique=True, comment="模板名称")
source_type = Column(String(50), nullable=False, comment="源数据源类型")
target_type = Column(String(50), nullable=False, comment="目标数据源类型")
template_content = Column(Text, nullable=False, comment="模板内容")
description = Column(Text, nullable=True, comment="模板描述")
version = Column(String(20), default='1.0.0', comment="版本号")
is_system = Column(Boolean, default=False, comment="是否系统模板")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_by = Column(String(255), nullable=True, comment="创建者")
def __repr__(self):
return f"<DataxTemplate(id={self.id}, name={self.name}, source={self.source_type}, target={self.target_type})>"

View File

@@ -0,0 +1,34 @@
from sqlalchemy import Column, String, Text, Integer, BigInteger, DECIMAL, JSON, TIMESTAMP
from sqlalchemy.sql import func
from app.db.database import Base
class TaskExecution(Base):
"""任务执行明细模型"""
__tablename__ = "t_dc_task_executions"
id = Column(String(36), primary_key=True, comment="执行记录ID(UUID)")
task_id = Column(String(36), nullable=False, comment="任务ID")
task_name = Column(String(255), nullable=False, comment="任务名称")
status = Column(String(20), default='RUNNING', comment="执行状态:RUNNING/SUCCESS/FAILED/STOPPED")
progress = Column(DECIMAL(5, 2), default=0.00, comment="进度百分比")
records_total = Column(BigInteger, default=0, comment="总记录数")
records_processed = Column(BigInteger, default=0, comment="已处理记录数")
records_success = Column(BigInteger, default=0, comment="成功记录数")
records_failed = Column(BigInteger, default=0, comment="失败记录数")
throughput = Column(DECIMAL(10, 2), default=0.00, comment="吞吐量(条/秒)")
data_size_bytes = Column(BigInteger, default=0, comment="数据量(字节)")
started_at = Column(TIMESTAMP, nullable=True, comment="开始时间")
completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
duration_seconds = Column(Integer, default=0, comment="执行时长(秒)")
config = Column(JSON, nullable=True, comment="执行配置")
error_message = Column(Text, nullable=True, comment="错误信息")
datax_job_id = Column(Text, nullable=True, comment="datax任务ID")
result = Column(Text, nullable=True, comment="执行结果")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_by = Column(String(255), nullable=True, comment="创建者")
updated_by = Column(String(255), nullable=True, comment="更新者")
def __repr__(self):
return f"<TaskExecution(id={self.id}, task_id={self.task_id}, status={self.status})>"

View File

@@ -0,0 +1,26 @@
from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP
from sqlalchemy.sql import func
from app.db.database import Base
class TaskLog(Base):
"""任务执行记录模型"""
__tablename__ = "t_dc_task_log"
id = Column(String(36), primary_key=True, comment="执行记录ID(UUID)")
task_id = Column(String(36), nullable=False, comment="任务ID")
task_name = Column(String(255), nullable=False, comment="任务名称")
sync_mode = Column(String(20), default='FULL', comment="同步模式:FULL/INCREMENTAL")
status = Column(String(20), default='RUNNING', comment="执行状态:RUNNING/SUCCESS/FAILED/STOPPED")
start_time = Column(TIMESTAMP, nullable=True, comment="开始时间")
end_time = Column(TIMESTAMP, nullable=True, comment="结束时间")
duration = Column(BigInteger, nullable=True, comment="执行时长(毫秒)")
process_id = Column(String(50), nullable=True, comment="进程ID")
log_path = Column(String(500), nullable=True, comment="日志文件路径")
error_msg = Column(Text, nullable=True, comment="错误信息")
result = Column(Text, nullable=True, comment="执行结果")
retry_times = Column(Integer, default=0, comment="重试次数")
create_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
def __repr__(self):
return f"<TaskLog(id={self.id}, task_id={self.task_id}, status={self.status})>"