You've already forked DataMate
实现边分割边上传的流式处理,避免大文件一次性加载导致前端卡顿。 修改内容: 1. file.util.ts - 流式分割上传核心功能 - 新增 streamSplitAndUpload 函数,实现边分割边上传 - 新增 shouldStreamUpload 函数,判断是否使用流式上传 - 新增 StreamUploadOptions 和 StreamUploadResult 接口 - 优化分片大小(默认 5MB) 2. ImportConfiguration.tsx - 智能上传策略 - 大文件(>5MB)使用流式分割上传 - 小文件(≤5MB)使用传统分割方式 - 保持 UI 不变 3. useSliceUpload.tsx - 流式上传处理 - 新增 handleStreamUpload 处理流式上传事件 - 支持并发上传和更好的进度管理 4. TaskUpload.tsx - 进度显示优化 - 注册流式上传事件监听器 - 显示流式上传信息(已上传行数、当前文件等) 5. dataset.model.ts - 类型定义扩展 - 新增 StreamUploadInfo 接口 - TaskItem 接口添加 streamUploadInfo 和 prefix 字段 实现特点: - 流式读取:使用 Blob.slice 逐块读取,避免一次性加载 - 逐行检测:按换行符分割,形成完整行后立即上传 - 内存优化:buffer 只保留当前块和未完成行,不累积所有分割结果 - 并发控制:支持 3 个并发上传,提升效率 - 进度可见:实时显示已上传行数和总体进度 - 错误处理:单个文件上传失败不影响其他文件 - 向后兼容:小文件仍使用原有分割方式 优势: - 大文件上传不再卡顿,用户体验大幅提升 - 内存占用显著降低(从加载整个文件到只保留当前块) - 上传效率提升(边分割边上传,并发上传多个小文件) 相关文件: - frontend/src/utils/file.util.ts - frontend/src/pages/DataManagement/Detail/components/ImportConfiguration.tsx - frontend/src/hooks/useSliceUpload.tsx - frontend/src/pages/Layout/TaskUpload.tsx - frontend/src/pages/DataManagement/dataset.model.ts
126 lines
2.6 KiB
TypeScript
126 lines
2.6 KiB
TypeScript
export enum DatasetType {
|
|
TEXT = "TEXT",
|
|
IMAGE = "IMAGE",
|
|
AUDIO = "AUDIO",
|
|
VIDEO = "VIDEO",
|
|
}
|
|
|
|
export enum DatasetSubType {
|
|
TEXT_DOCUMENT = "TEXT_DOCUMENT",
|
|
TEXT_WEB = "TEXT_WEB",
|
|
TEXT_DIALOG = "TEXT_DIALOG",
|
|
IMAGE_IMAGE = "IMAGE_IMAGE",
|
|
IMAGE_CAPTION = "IMAGE_CAPTION",
|
|
AUDIO_AUDIO = "AUDIO_AUDIO",
|
|
AUDIO_JSONL = "AUDIO_JSONL",
|
|
VIDEO_VIDEO = "VIDEO_VIDEO",
|
|
VIDEO_JSONL = "VIDEO_JSONL",
|
|
}
|
|
|
|
export enum DatasetStatus {
|
|
ACTIVE = "ACTIVE",
|
|
INACTIVE = "INACTIVE",
|
|
PROCESSING = "PROCESSING",
|
|
DRAFT = "DRAFT",
|
|
}
|
|
|
|
export enum DataSource {
|
|
UPLOAD = "UPLOAD",
|
|
COLLECTION = "COLLECTION",
|
|
DATABASE = "DATABASE",
|
|
NAS = "NAS",
|
|
OBS = "OBS",
|
|
}
|
|
|
|
export interface DatasetFile {
|
|
id: string;
|
|
datasetId?: string;
|
|
fileName: string;
|
|
size: string;
|
|
uploadDate: string;
|
|
path: string;
|
|
filePath?: string;
|
|
}
|
|
|
|
export interface Dataset {
|
|
id: string;
|
|
name: string;
|
|
description: string;
|
|
parentDatasetId?: string;
|
|
datasetType: DatasetType;
|
|
status: DatasetStatus;
|
|
size?: string;
|
|
itemCount?: number;
|
|
fileCount?: number;
|
|
createdBy: string;
|
|
createdAt: string;
|
|
updatedAt: string;
|
|
tags: string[];
|
|
targetLocation?: string;
|
|
distribution?: Record<string, Record<string, number>>;
|
|
}
|
|
|
|
export interface DatasetImportConfig {
|
|
source?: DataSource | string;
|
|
target?: DataSource | string;
|
|
dataSource?: string;
|
|
splitByLine?: boolean;
|
|
hasArchive?: boolean;
|
|
[key: string]: string | number | boolean | null | undefined;
|
|
}
|
|
|
|
export interface TagItem {
|
|
id: string;
|
|
name: string;
|
|
color: string;
|
|
description: string;
|
|
}
|
|
|
|
export interface ScheduleConfig {
|
|
type: "immediate" | "scheduled";
|
|
scheduleType?: "daily" | "weekly" | "monthly" | "custom";
|
|
time?: string;
|
|
dayOfWeek?: string;
|
|
dayOfMonth?: string;
|
|
cronExpression?: string;
|
|
maxExecutions?: number;
|
|
executionCount?: number;
|
|
}
|
|
|
|
export interface DatasetTask {
|
|
id: string;
|
|
name: string;
|
|
description: string;
|
|
type: string;
|
|
status: "importing" | "waiting" | "completed" | "failed";
|
|
progress: number;
|
|
createdAt: string;
|
|
importConfig: DatasetImportConfig;
|
|
scheduleConfig: ScheduleConfig;
|
|
nextExecution?: string;
|
|
lastExecution?: string;
|
|
executionHistory?: { time: string; status: string }[];
|
|
}
|
|
|
|
export interface StreamUploadInfo {
|
|
currentFile: string;
|
|
fileIndex: number;
|
|
totalFiles: number;
|
|
uploadedLines: number;
|
|
}
|
|
|
|
export interface TaskItem {
|
|
key: string;
|
|
title: string;
|
|
percent: number;
|
|
reqId: number;
|
|
isCancel?: boolean;
|
|
controller: AbortController;
|
|
cancelFn?: () => void;
|
|
updateEvent?: string;
|
|
size?: number;
|
|
hasArchive?: boolean;
|
|
prefix?: string;
|
|
streamUploadInfo?: StreamUploadInfo;
|
|
}
|