Files
DataMate/backend/openapi/specs/data-synthesis.yaml
2025-10-21 23:00:48 +08:00

621 lines
14 KiB
YAML

openapi: 3.0.3
info:
title: Data Synthesis Service API
description: 数据合成服务API - 指令、COT蒸馏、多模态合成
version: 1.0.0
contact:
name: Data Mate Platform Team
servers:
- url: http://localhost:8085
description: Development server
tags:
- name: synthesis-templates
description: 合成模板管理
- name: synthesis-jobs
description: 合成任务管理
- name: instruction-tuning
description: 指令调优
- name: cot-distillation
description: COT蒸馏
paths:
/api/v1/synthesis/templates:
get:
tags:
- synthesis-templates
summary: 获取合成模板列表
parameters:
- name: page
in: query
schema:
type: integer
default: 0
- name: size
in: query
schema:
type: integer
default: 20
- name: type
in: query
schema:
$ref: '#/components/schemas/SynthesisType'
responses:
'200':
description: 获取成功
content:
application/json:
schema:
$ref: '#/components/schemas/SynthesisTemplatePageResponse'
post:
tags:
- synthesis-templates
summary: 创建合成模板
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateSynthesisTemplateRequest'
responses:
'201':
description: 创建成功
content:
application/json:
schema:
$ref: '#/components/schemas/SynthesisTemplateResponse'
/api/v1/synthesis/templates/{templateId}:
get:
tags:
- synthesis-templates
summary: 获取合成模板详情
parameters:
- name: templateId
in: path
required: true
schema:
type: string
responses:
'200':
description: 获取成功
content:
application/json:
schema:
$ref: '#/components/schemas/SynthesisTemplateDetailResponse'
put:
tags:
- synthesis-templates
summary: 更新合成模板
parameters:
- name: templateId
in: path
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/UpdateSynthesisTemplateRequest'
responses:
'200':
description: 更新成功
/api/v1/synthesis/jobs:
get:
tags:
- synthesis-jobs
summary: 获取合成任务列表
parameters:
- name: page
in: query
schema:
type: integer
default: 0
- name: size
in: query
schema:
type: integer
default: 20
- name: status
in: query
schema:
$ref: '#/components/schemas/JobStatus'
responses:
'200':
description: 获取成功
content:
application/json:
schema:
$ref: '#/components/schemas/SynthesisJobPageResponse'
post:
tags:
- synthesis-jobs
summary: 创建合成任务
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateSynthesisJobRequest'
responses:
'201':
description: 任务创建成功
content:
application/json:
schema:
$ref: '#/components/schemas/SynthesisJobResponse'
/api/v1/synthesis/jobs/{jobId}:
get:
tags:
- synthesis-jobs
summary: 获取合成任务详情
parameters:
- name: jobId
in: path
required: true
schema:
type: string
responses:
'200':
description: 获取成功
content:
application/json:
schema:
$ref: '#/components/schemas/SynthesisJobDetailResponse'
/api/v1/synthesis/jobs/{jobId}/execute:
post:
tags:
- synthesis-jobs
summary: 执行合成任务
parameters:
- name: jobId
in: path
required: true
schema:
type: string
responses:
'200':
description: 任务开始执行
content:
application/json:
schema:
$ref: '#/components/schemas/JobExecutionResponse'
/api/v1/synthesis/instruction-tuning:
post:
tags:
- instruction-tuning
summary: 指令调优数据合成
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/InstructionTuningRequest'
responses:
'200':
description: 合成成功
content:
application/json:
schema:
$ref: '#/components/schemas/InstructionTuningResponse'
/api/v1/synthesis/cot-distillation:
post:
tags:
- cot-distillation
summary: COT蒸馏数据合成
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/COTDistillationRequest'
responses:
'200':
description: 蒸馏成功
content:
application/json:
schema:
$ref: '#/components/schemas/COTDistillationResponse'
components:
schemas:
SynthesisTemplateResponse:
type: object
properties:
id:
type: string
name:
type: string
description:
type: string
type:
$ref: '#/components/schemas/SynthesisType'
category:
type: string
modelConfig:
$ref: '#/components/schemas/ModelConfig'
enabled:
type: boolean
createdAt:
type: string
format: date-time
SynthesisTemplateDetailResponse:
allOf:
- $ref: '#/components/schemas/SynthesisTemplateResponse'
- type: object
properties:
promptTemplate:
type: string
parameters:
type: object
examples:
type: array
items:
$ref: '#/components/schemas/SynthesisExample'
SynthesisTemplatePageResponse:
type: object
properties:
content:
type: array
items:
$ref: '#/components/schemas/SynthesisTemplateResponse'
totalElements:
type: integer
format: int64
totalPages:
type: integer
size:
type: integer
number:
type: integer
CreateSynthesisTemplateRequest:
type: object
required:
- name
- type
- promptTemplate
properties:
name:
type: string
description:
type: string
type:
$ref: '#/components/schemas/SynthesisType'
category:
type: string
promptTemplate:
type: string
modelConfig:
$ref: '#/components/schemas/ModelConfig'
parameters:
type: object
UpdateSynthesisTemplateRequest:
type: object
properties:
name:
type: string
description:
type: string
promptTemplate:
type: string
enabled:
type: boolean
parameters:
type: object
SynthesisJobResponse:
type: object
properties:
id:
type: string
name:
type: string
description:
type: string
templateId:
type: string
status:
$ref: '#/components/schemas/JobStatus'
progress:
type: number
format: double
targetCount:
type: integer
generatedCount:
type: integer
startTime:
type: string
format: date-time
endTime:
type: string
format: date-time
createdAt:
type: string
format: date-time
SynthesisJobDetailResponse:
allOf:
- $ref: '#/components/schemas/SynthesisJobResponse'
- type: object
properties:
template:
$ref: '#/components/schemas/SynthesisTemplateResponse'
statistics:
$ref: '#/components/schemas/SynthesisStatistics'
samples:
type: array
items:
$ref: '#/components/schemas/GeneratedSample'
SynthesisJobPageResponse:
type: object
properties:
content:
type: array
items:
$ref: '#/components/schemas/SynthesisJobResponse'
totalElements:
type: integer
format: int64
totalPages:
type: integer
size:
type: integer
number:
type: integer
CreateSynthesisJobRequest:
type: object
required:
- name
- templateId
- targetCount
properties:
name:
type: string
description:
type: string
templateId:
type: string
targetCount:
type: integer
parameters:
type: object
seedData:
type: array
items:
type: object
JobExecutionResponse:
type: object
properties:
executionId:
type: string
status:
type: string
message:
type: string
InstructionTuningRequest:
type: object
required:
- baseInstructions
- targetDomain
- count
properties:
baseInstructions:
type: array
items:
type: string
targetDomain:
type: string
count:
type: integer
modelConfig:
$ref: '#/components/schemas/ModelConfig'
parameters:
type: object
InstructionTuningResponse:
type: object
properties:
jobId:
type: string
generatedInstructions:
type: array
items:
$ref: '#/components/schemas/GeneratedInstruction'
statistics:
$ref: '#/components/schemas/GenerationStatistics'
COTDistillationRequest:
type: object
required:
- sourceModel
- targetFormat
- examples
properties:
sourceModel:
type: string
targetFormat:
type: string
enum: [QA, INSTRUCTION, REASONING]
examples:
type: array
items:
$ref: '#/components/schemas/COTExample'
parameters:
type: object
COTDistillationResponse:
type: object
properties:
jobId:
type: string
distilledData:
type: array
items:
$ref: '#/components/schemas/DistilledCOTData'
statistics:
$ref: '#/components/schemas/DistillationStatistics'
SynthesisType:
type: string
enum:
- INSTRUCTION_TUNING
- COT_DISTILLATION
- DIALOGUE_GENERATION
- TEXT_AUGMENTATION
- MULTIMODAL_SYNTHESIS
- CUSTOM
JobStatus:
type: string
enum:
- PENDING
- RUNNING
- COMPLETED
- FAILED
- CANCELLED
ModelConfig:
type: object
properties:
modelName:
type: string
temperature:
type: number
format: double
maxTokens:
type: integer
topP:
type: number
format: double
frequencyPenalty:
type: number
format: double
SynthesisExample:
type: object
properties:
input:
type: string
output:
type: string
explanation:
type: string
SynthesisStatistics:
type: object
properties:
totalGenerated:
type: integer
successfulGenerated:
type: integer
failedGenerated:
type: integer
averageLength:
type: number
format: double
uniqueCount:
type: integer
GeneratedSample:
type: object
properties:
id:
type: string
content:
type: string
score:
type: number
format: double
metadata:
type: object
createdAt:
type: string
format: date-time
GeneratedInstruction:
type: object
properties:
instruction:
type: string
input:
type: string
output:
type: string
quality:
type: number
format: double
GenerationStatistics:
type: object
properties:
totalGenerated:
type: integer
averageQuality:
type: number
format: double
diversityScore:
type: number
format: double
COTExample:
type: object
properties:
question:
type: string
reasoning:
type: string
answer:
type: string
DistilledCOTData:
type: object
properties:
question:
type: string
reasoning:
type: string
answer:
type: string
confidence:
type: number
format: double
DistillationStatistics:
type: object
properties:
totalProcessed:
type: integer
successfulDistilled:
type: integer
averageConfidence:
type: number
format: double
securitySchemes:
BearerAuth:
type: http
scheme: bearer
bearerFormat: JWT
security:
- BearerAuth: []