You've already forked DataMate
feature: deer-flow支持从datamate获取外部接入模型 (#83)
* feature: deer-flow支持从datamate获取外部接入模型
This commit is contained in:
@@ -4,13 +4,11 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
paths:
|
paths:
|
||||||
- 'backend/**'
|
|
||||||
- 'scripts/images/datamate-python/**'
|
- 'scripts/images/datamate-python/**'
|
||||||
- '.github/workflows/docker-image-backend-python.yml'
|
- '.github/workflows/docker-image-backend-python.yml'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
paths:
|
paths:
|
||||||
- 'backend/**'
|
|
||||||
- 'scripts/images/datamate-python/**'
|
- 'scripts/images/datamate-python/**'
|
||||||
- '.github/workflows/docker-image-backend-python.yml'
|
- '.github/workflows/docker-image-backend-python.yml'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|||||||
6
Makefile
6
Makefile
@@ -174,7 +174,11 @@ mineru-k8s-uninstall:
|
|||||||
|
|
||||||
.PHONY: datamate-docker-install
|
.PHONY: datamate-docker-install
|
||||||
datamate-docker-install:
|
datamate-docker-install:
|
||||||
cd deployment/docker/datamate && export REGISTRY=$(REGISTRY) && docker compose -f docker-compose.yml up -d
|
@if docker compose ls --filter name=deer-flow | grep -q deer-flow; then \
|
||||||
|
cd deployment/docker/datamate && export NGINX_CONF="./backend-with-deer-flow.conf" && export REGISTRY=$(REGISTRY) && docker compose -f docker-compose.yml up -d; \
|
||||||
|
else \
|
||||||
|
cd deployment/docker/datamate && export REGISTRY=$(REGISTRY) && docker compose -f docker-compose.yml up -d; \
|
||||||
|
fi
|
||||||
|
|
||||||
.PHONY: datamate-docker-uninstall
|
.PHONY: datamate-docker-uninstall
|
||||||
datamate-docker-uninstall:
|
datamate-docker-uninstall:
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import java.util.ArrayList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
import com.datamate.common.interfaces.PagingQuery;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
|
|
||||||
@@ -13,12 +14,7 @@ import lombok.Setter;
|
|||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
public class OperatorsListPostRequest {
|
public class OperatorsListPostRequest extends PagingQuery {
|
||||||
|
|
||||||
private Integer page;
|
|
||||||
|
|
||||||
private Integer size;
|
|
||||||
|
|
||||||
private List<String> categories = new ArrayList<>();
|
private List<String> categories = new ArrayList<>();
|
||||||
|
|
||||||
private String operatorName;
|
private String operatorName;
|
||||||
|
|||||||
@@ -58,9 +58,10 @@ public class ModelConfigApplicationService {
|
|||||||
.baseUrl(modelConfig.getBaseUrl())
|
.baseUrl(modelConfig.getBaseUrl())
|
||||||
.apiKey(modelConfig.getApiKey())
|
.apiKey(modelConfig.getApiKey())
|
||||||
.isEnabled(true)
|
.isEnabled(true)
|
||||||
|
.isDefault(modelConfig.getIsDefault())
|
||||||
.build();
|
.build();
|
||||||
ModelClient.checkHealth(newConfig);
|
ModelClient.checkHealth(newConfig);
|
||||||
modelConfigRepository.save(newConfig);
|
modelConfigRepository.saveAndSetDefault(newConfig);
|
||||||
return newConfig;
|
return newConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,7 +75,7 @@ public class ModelConfigApplicationService {
|
|||||||
modelConfig.setApiKey(updateModelRequest.getApiKey());
|
modelConfig.setApiKey(updateModelRequest.getApiKey());
|
||||||
modelConfig.setIsEnabled(true);
|
modelConfig.setIsEnabled(true);
|
||||||
ModelClient.checkHealth(modelConfig);
|
ModelClient.checkHealth(modelConfig);
|
||||||
modelConfigRepository.updateById(modelConfig);
|
modelConfigRepository.updateAndSetDefault(modelConfig, updateModelRequest.getIsDefault());
|
||||||
return modelConfig;
|
return modelConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -42,4 +42,9 @@ public class ModelConfig extends BaseEntity<String> {
|
|||||||
* 是否启用:1-启用,0-禁用
|
* 是否启用:1-启用,0-禁用
|
||||||
*/
|
*/
|
||||||
private Boolean isEnabled;
|
private Boolean isEnabled;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 是否默认:1-默认,0-非默认
|
||||||
|
*/
|
||||||
|
private Boolean isDefault;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package com.datamate.common.setting.domain.repository;
|
|||||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||||
import com.baomidou.mybatisplus.extension.repository.IRepository;
|
import com.baomidou.mybatisplus.extension.repository.IRepository;
|
||||||
import com.datamate.common.setting.domain.entity.ModelConfig;
|
import com.datamate.common.setting.domain.entity.ModelConfig;
|
||||||
|
import com.datamate.common.setting.domain.entity.ModelType;
|
||||||
import com.datamate.common.setting.interfaces.rest.dto.QueryModelRequest;
|
import com.datamate.common.setting.interfaces.rest.dto.QueryModelRequest;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -19,4 +20,10 @@ public interface ModelConfigRepository extends IRepository<ModelConfig> {
|
|||||||
* @return 模型配置列表
|
* @return 模型配置列表
|
||||||
*/
|
*/
|
||||||
IPage<ModelConfig> page(QueryModelRequest queryModelRequest);
|
IPage<ModelConfig> page(QueryModelRequest queryModelRequest);
|
||||||
|
|
||||||
|
void saveAndSetDefault(ModelConfig modelConfig);
|
||||||
|
|
||||||
|
void removeDefault(ModelType type);
|
||||||
|
|
||||||
|
void updateAndSetDefault(ModelConfig modelConfig, Boolean isDefault);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,15 +1,18 @@
|
|||||||
package com.datamate.common.setting.infrastructure.persistence.impl;
|
package com.datamate.common.setting.infrastructure.persistence.impl;
|
||||||
|
|
||||||
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||||
|
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
|
||||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||||
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
||||||
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
|
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
|
||||||
import com.datamate.common.setting.domain.entity.ModelConfig;
|
import com.datamate.common.setting.domain.entity.ModelConfig;
|
||||||
|
import com.datamate.common.setting.domain.entity.ModelType;
|
||||||
import com.datamate.common.setting.domain.repository.ModelConfigRepository;
|
import com.datamate.common.setting.domain.repository.ModelConfigRepository;
|
||||||
import com.datamate.common.setting.infrastructure.persistence.mapper.ModelConfigMapper;
|
import com.datamate.common.setting.infrastructure.persistence.mapper.ModelConfigMapper;
|
||||||
import com.datamate.common.setting.interfaces.rest.dto.QueryModelRequest;
|
import com.datamate.common.setting.interfaces.rest.dto.QueryModelRequest;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import org.springframework.stereotype.Repository;
|
import org.springframework.stereotype.Repository;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
import org.springframework.util.StringUtils;
|
import org.springframework.util.StringUtils;
|
||||||
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
@@ -32,6 +35,36 @@ public class ModelConfigRepositoryImpl extends CrudRepository<ModelConfigMapper,
|
|||||||
return this.page(page, new LambdaQueryWrapper<ModelConfig>()
|
return this.page(page, new LambdaQueryWrapper<ModelConfig>()
|
||||||
.eq(StringUtils.hasText(queryModelRequest.getProvider()), ModelConfig::getProvider, queryModelRequest.getProvider())
|
.eq(StringUtils.hasText(queryModelRequest.getProvider()), ModelConfig::getProvider, queryModelRequest.getProvider())
|
||||||
.eq(Objects.nonNull(queryModelRequest.getType()), ModelConfig::getType, queryModelRequest.getType())
|
.eq(Objects.nonNull(queryModelRequest.getType()), ModelConfig::getType, queryModelRequest.getType())
|
||||||
.eq(Objects.nonNull(queryModelRequest.getIsEnabled()), ModelConfig::getIsEnabled, queryModelRequest.getIsEnabled()));
|
.eq(Objects.nonNull(queryModelRequest.getIsEnabled()), ModelConfig::getIsEnabled, queryModelRequest.getIsEnabled())
|
||||||
|
.eq(Objects.nonNull(queryModelRequest.getIsDefault()), ModelConfig::getIsDefault, queryModelRequest.getIsDefault()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
public void saveAndSetDefault(ModelConfig modelConfig) {
|
||||||
|
LambdaQueryWrapper<ModelConfig> queryWrapper = new LambdaQueryWrapper<>();
|
||||||
|
queryWrapper.eq(ModelConfig::getType, modelConfig.getType()).eq(ModelConfig::getIsDefault, true);
|
||||||
|
if (this.getOne(queryWrapper) == null) {
|
||||||
|
modelConfig.setIsDefault(true);
|
||||||
|
} else {
|
||||||
|
removeDefault(modelConfig.getType());
|
||||||
|
}
|
||||||
|
super.save(modelConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void removeDefault(ModelType type) {
|
||||||
|
LambdaUpdateWrapper<ModelConfig> updateWrapper = new LambdaUpdateWrapper<>();
|
||||||
|
updateWrapper.set(ModelConfig::getIsDefault, false)
|
||||||
|
.eq(ModelConfig::getType, type)
|
||||||
|
.eq(ModelConfig::getIsDefault, true);
|
||||||
|
this.update(updateWrapper);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
public void updateAndSetDefault(ModelConfig modelConfig, Boolean isDefault) {
|
||||||
|
if (!modelConfig.getIsDefault() && isDefault) {
|
||||||
|
removeDefault(modelConfig.getType());
|
||||||
|
}
|
||||||
|
modelConfig.setIsDefault(isDefault);
|
||||||
|
this.updateById(modelConfig);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,4 +43,9 @@ public class CreateModelRequest {
|
|||||||
* 是否启用:1-启用,0-禁用
|
* 是否启用:1-启用,0-禁用
|
||||||
*/
|
*/
|
||||||
private Boolean isEnabled;
|
private Boolean isEnabled;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 是否默认:1-默认,0-非默认
|
||||||
|
*/
|
||||||
|
private Boolean isDefault;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,4 +24,6 @@ public class QueryModelRequest extends PagingQuery {
|
|||||||
private ModelType type;
|
private ModelType type;
|
||||||
|
|
||||||
private Boolean isEnabled;
|
private Boolean isEnabled;
|
||||||
|
|
||||||
|
private Boolean isDefault;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import {
|
|||||||
Table,
|
Table,
|
||||||
Tooltip,
|
Tooltip,
|
||||||
Popconfirm,
|
Popconfirm,
|
||||||
message,
|
message, Switch,
|
||||||
} from "antd";
|
} from "antd";
|
||||||
import {
|
import {
|
||||||
EditOutlined,
|
EditOutlined,
|
||||||
@@ -374,6 +374,14 @@ export default function EnvironmentAccess() {
|
|||||||
>
|
>
|
||||||
<Select options={typeOptions} placeholder="选择模型类型"></Select>
|
<Select options={typeOptions} placeholder="选择模型类型"></Select>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
<Form.Item
|
||||||
|
name="isDefault"
|
||||||
|
label="设为默认"
|
||||||
|
required
|
||||||
|
tooltip="当模型类型下仅有一个模型服务时,自动将其设为默认值。"
|
||||||
|
>
|
||||||
|
<Switch />
|
||||||
|
</Form.Item>
|
||||||
</Form>
|
</Form>
|
||||||
</Modal>
|
</Modal>
|
||||||
</>
|
</>
|
||||||
|
|||||||
@@ -1,177 +0,0 @@
|
|||||||
diff --git a/src/rag/milvus.py b/src/rag/milvus.py
|
|
||||||
index de589d4..c1b9b98 100644
|
|
||||||
--- a/src/rag/milvus.py
|
|
||||||
+++ b/src/rag/milvus.py
|
|
||||||
@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
|
|
||||||
from langchain_milvus.vectorstores import Milvus as LangchainMilvus
|
|
||||||
from langchain_openai import OpenAIEmbeddings
|
|
||||||
from openai import OpenAI
|
|
||||||
-from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient
|
|
||||||
+from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient, utility
|
|
||||||
|
|
||||||
from src.config.loader import get_bool_env, get_int_env, get_str_env
|
|
||||||
from src.rag.retriever import Chunk, Document, Resource, Retriever
|
|
||||||
@@ -397,6 +397,36 @@ class MilvusRetriever(Retriever):
|
|
||||||
except Exception as e:
|
|
||||||
raise ConnectionError(f"Failed to connect to Milvus: {str(e)}")
|
|
||||||
|
|
||||||
+ def _connect_with_collection(self, collection_name) -> None:
|
|
||||||
+ """Create the underlying Milvus client (idempotent)."""
|
|
||||||
+ try:
|
|
||||||
+ # Check if using Milvus Lite (file-based) vs server-based Milvus
|
|
||||||
+ if self._is_milvus_lite():
|
|
||||||
+ # Use MilvusClient for Milvus Lite (local file database)
|
|
||||||
+ self.client = MilvusClient(self.uri)
|
|
||||||
+ # Ensure collection exists
|
|
||||||
+ self._ensure_collection_exists()
|
|
||||||
+ else:
|
|
||||||
+ connection_args = {
|
|
||||||
+ "uri": self.uri,
|
|
||||||
+ }
|
|
||||||
+ # Add user/password only if provided
|
|
||||||
+ if self.user:
|
|
||||||
+ connection_args["user"] = self.user
|
|
||||||
+ if self.password:
|
|
||||||
+ connection_args["password"] = self.password
|
|
||||||
+
|
|
||||||
+ # Create LangChain client (it will handle collection creation automatically)
|
|
||||||
+ self.client = LangchainMilvus(
|
|
||||||
+ embedding_function=self.embedding_model,
|
|
||||||
+ collection_name=collection_name,
|
|
||||||
+ connection_args=connection_args,
|
|
||||||
+ # optional (if collection already exists with different schema, be careful)
|
|
||||||
+ drop_old=False,
|
|
||||||
+ )
|
|
||||||
+ except Exception as e:
|
|
||||||
+ raise ConnectionError(f"Failed to connect to Milvus: {str(e)}")
|
|
||||||
+
|
|
||||||
def _is_milvus_lite(self) -> bool:
|
|
||||||
"""Return True if the URI points to a local Milvus Lite file.
|
|
||||||
Milvus Lite uses local file paths (often ``*.db``) without an HTTP/HTTPS
|
|
||||||
@@ -476,26 +506,12 @@ class MilvusRetriever(Retriever):
|
|
||||||
else:
|
|
||||||
# Use similarity_search_by_vector for lightweight listing.
|
|
||||||
# If a query is provided embed it; else use a zero vector.
|
|
||||||
- docs: Iterable[Any] = self.client.similarity_search(
|
|
||||||
- query,
|
|
||||||
- k=100,
|
|
||||||
- expr="source == 'examples'", # Limit to 100 results
|
|
||||||
- )
|
|
||||||
- for d in docs:
|
|
||||||
- meta = getattr(d, "metadata", {}) or {}
|
|
||||||
- # check if the resource is in the list of resources
|
|
||||||
- if resources and any(
|
|
||||||
- r.uri == meta.get(self.url_field, "")
|
|
||||||
- or r.uri == f"milvus://{meta.get(self.id_field, '')}"
|
|
||||||
- for r in resources
|
|
||||||
- ):
|
|
||||||
- continue
|
|
||||||
+ connections = utility.list_collections(using=f"{self.uri}-{self.user}")
|
|
||||||
+ for connection in connections:
|
|
||||||
resources.append(
|
|
||||||
Resource(
|
|
||||||
- uri=meta.get(self.url_field, "")
|
|
||||||
- or f"milvus://{meta.get(self.id_field, '')}",
|
|
||||||
- title=meta.get(self.title_field, "")
|
|
||||||
- or meta.get(self.id_field, "Unnamed"),
|
|
||||||
+ uri=f"milvus://{connection}",
|
|
||||||
+ title=connection,
|
|
||||||
description="Stored Milvus document",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
@@ -621,38 +637,32 @@ class MilvusRetriever(Retriever):
|
|
||||||
|
|
||||||
else:
|
|
||||||
# For LangChain Milvus, use similarity search
|
|
||||||
- search_results = self.client.similarity_search_with_score(
|
|
||||||
- query=query, k=self.top_k
|
|
||||||
- )
|
|
||||||
+ if not resources:
|
|
||||||
+ return []
|
|
||||||
|
|
||||||
documents = {}
|
|
||||||
+ for resource in resources:
|
|
||||||
+ self._connect_with_collection(resource.title)
|
|
||||||
+ search_results = self.client.similarity_search_with_score(
|
|
||||||
+ query=query, k=self.top_k
|
|
||||||
+ )
|
|
||||||
|
|
||||||
- for doc, score in search_results:
|
|
||||||
- metadata = doc.metadata or {}
|
|
||||||
- doc_id = metadata.get(self.id_field, "")
|
|
||||||
- title = metadata.get(self.title_field, "")
|
|
||||||
- url = metadata.get(self.url_field, "")
|
|
||||||
- content = doc.page_content
|
|
||||||
-
|
|
||||||
- # Skip if resource filtering is requested and this doc is not in the list
|
|
||||||
- if resources:
|
|
||||||
- doc_in_resources = False
|
|
||||||
- for resource in resources:
|
|
||||||
- if (url and url in resource.uri) or doc_id in resource.uri:
|
|
||||||
- doc_in_resources = True
|
|
||||||
- break
|
|
||||||
- if not doc_in_resources:
|
|
||||||
- continue
|
|
||||||
-
|
|
||||||
- # Create or update document
|
|
||||||
- if doc_id not in documents:
|
|
||||||
- documents[doc_id] = Document(
|
|
||||||
- id=doc_id, url=url, title=title, chunks=[]
|
|
||||||
- )
|
|
||||||
+ for doc, score in search_results:
|
|
||||||
+ metadata = doc.metadata or {}
|
|
||||||
+ doc_id = metadata.get(self.id_field, "")
|
|
||||||
+ title = metadata.get(self.title_field, "")
|
|
||||||
+ url = metadata.get(self.url_field, "")
|
|
||||||
+ content = doc.page_content
|
|
||||||
+
|
|
||||||
+ # Create or update document
|
|
||||||
+ if doc_id not in documents:
|
|
||||||
+ documents[doc_id] = Document(
|
|
||||||
+ id=doc_id, url=url, title=title, chunks=[]
|
|
||||||
+ )
|
|
||||||
|
|
||||||
- # Add chunk to document
|
|
||||||
- chunk = Chunk(content=content, similarity=score)
|
|
||||||
- documents[doc_id].chunks.append(chunk)
|
|
||||||
+ # Add chunk to document
|
|
||||||
+ chunk = Chunk(content=content, similarity=score)
|
|
||||||
+ documents[doc_id].chunks.append(chunk)
|
|
||||||
|
|
||||||
return list(documents.values())
|
|
||||||
|
|
||||||
diff --git a/web/src/components/deer-flow/theme-provider-wrapper.tsx b/web/src/components/deer-flow/theme-provider-wrapper.tsx
|
|
||||||
index 6da0db8..1a99bcf 100644
|
|
||||||
--- a/web/src/components/deer-flow/theme-provider-wrapper.tsx
|
|
||||||
+++ b/web/src/components/deer-flow/theme-provider-wrapper.tsx
|
|
||||||
@@ -18,9 +18,9 @@ export function ThemeProviderWrapper({
|
|
||||||
return (
|
|
||||||
<ThemeProvider
|
|
||||||
attribute="class"
|
|
||||||
- defaultTheme={"dark"}
|
|
||||||
+ defaultTheme={"light"}
|
|
||||||
enableSystem={isChatPage}
|
|
||||||
- forcedTheme={isChatPage ? undefined : "dark"}
|
|
||||||
+ forcedTheme={isChatPage ? undefined : "light"}
|
|
||||||
disableTransitionOnChange
|
|
||||||
>
|
|
||||||
{children}
|
|
||||||
diff --git a/web/src/core/api/resolve-service-url.ts b/web/src/core/api/resolve-service-url.ts
|
|
||||||
index a87b777..d93e987 100644
|
|
||||||
--- a/web/src/core/api/resolve-service-url.ts
|
|
||||||
+++ b/web/src/core/api/resolve-service-url.ts
|
|
||||||
@@ -4,9 +4,13 @@
|
|
||||||
import { env } from "~/env";
|
|
||||||
|
|
||||||
export function resolveServiceURL(path: string) {
|
|
||||||
- let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "http://localhost:8000/api/";
|
|
||||||
+ let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "/api/";
|
|
||||||
if (!BASE_URL.endsWith("/")) {
|
|
||||||
BASE_URL += "/";
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ const origin = window.location.origin;
|
|
||||||
+ BASE_URL = origin + BASE_URL;
|
|
||||||
+
|
|
||||||
return new URL(path, BASE_URL).toString();
|
|
||||||
}
|
|
||||||
6
runtime/ops/examples/test_operator/__init__.py
Normal file
6
runtime/ops/examples/test_operator/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from datamate.core.base_op import OPERATORS
|
||||||
|
|
||||||
|
OPERATORS.register_module(module_name='TestMapper',
|
||||||
|
module_path="ops.user.test_operator.process")
|
||||||
85
runtime/ops/examples/test_operator/metadata.yml
Normal file
85
runtime/ops/examples/test_operator/metadata.yml
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
name: '测试算子'
|
||||||
|
description: '这是一个测试算子。'
|
||||||
|
language: 'python'
|
||||||
|
vendor: 'huawei'
|
||||||
|
raw_id: 'TestMapper'
|
||||||
|
version: '1.0.0'
|
||||||
|
modal: 'text'
|
||||||
|
effect:
|
||||||
|
before: '使用方式很简单,只需要将代码放入Markdown文本中即可,富文本格式可直接复制表情😀使用。'
|
||||||
|
after: '使用方式很简单,只需要将代码放入Markdown文本中即可,富文本格式可直接复制表情使用。'
|
||||||
|
inputs: 'text'
|
||||||
|
outputs: 'text'
|
||||||
|
settings:
|
||||||
|
sliderTest:
|
||||||
|
name: '滑窗测试'
|
||||||
|
description: '这是一个测试滑窗。'
|
||||||
|
type: 'slider'
|
||||||
|
defaultVal: 0.5
|
||||||
|
min: 0
|
||||||
|
max: 1
|
||||||
|
step: 0.1
|
||||||
|
switchTest:
|
||||||
|
name: '开关测试'
|
||||||
|
description: '这是一个开关测试。'
|
||||||
|
type: 'switch'
|
||||||
|
defaultVal: 'true'
|
||||||
|
required: false
|
||||||
|
checkedLabel: '选中'
|
||||||
|
unCheckedLabel: '未选中'
|
||||||
|
radioTest:
|
||||||
|
name: '单选测试'
|
||||||
|
description: '这是一个单选测试。'
|
||||||
|
type: 'radio'
|
||||||
|
defaultVal: 'option1'
|
||||||
|
required: false
|
||||||
|
options:
|
||||||
|
- label: '选项1'
|
||||||
|
value: 'option1'
|
||||||
|
- label: '选项2'
|
||||||
|
value: 'option2'
|
||||||
|
selectTest:
|
||||||
|
name: '下拉框测试'
|
||||||
|
description: '这是一个下拉框测试。'
|
||||||
|
type: 'select'
|
||||||
|
defaultVal: 'option1'
|
||||||
|
required: false
|
||||||
|
options:
|
||||||
|
- label: '选项1'
|
||||||
|
value: 'option1'
|
||||||
|
- label: '选项2'
|
||||||
|
value: 'option2'
|
||||||
|
rangeTest:
|
||||||
|
name: '范围测试'
|
||||||
|
description: '这是一个范围框测试。'
|
||||||
|
type: 'range'
|
||||||
|
properties:
|
||||||
|
- name: 'rangeLeft'
|
||||||
|
type: 'inputNumber'
|
||||||
|
defaultVal: 100
|
||||||
|
min: 0
|
||||||
|
max: 10000
|
||||||
|
step: 1
|
||||||
|
- name: 'rangeRight'
|
||||||
|
type: 'inputNumber'
|
||||||
|
defaultVal: 8000
|
||||||
|
min: 0
|
||||||
|
max: 10000
|
||||||
|
step: 1
|
||||||
|
checkboxTest:
|
||||||
|
name: '多选框测试'
|
||||||
|
description: '这是一个多选框测试。'
|
||||||
|
type: 'checkbox'
|
||||||
|
defaultVal: 'option1,option2'
|
||||||
|
required: false
|
||||||
|
options:
|
||||||
|
- label: '选项1'
|
||||||
|
value: 'option1'
|
||||||
|
- label: '选项2'
|
||||||
|
value: 'option2'
|
||||||
|
inputTest:
|
||||||
|
name: '输入框测试'
|
||||||
|
description: '这是一个输入框测试。'
|
||||||
|
type: 'input'
|
||||||
|
defaultVal: 'Test Input'
|
||||||
|
required: false
|
||||||
10
runtime/ops/examples/test_operator/process.py
Normal file
10
runtime/ops/examples/test_operator/process.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
from datamate.core.base_op import Mapper
|
||||||
|
|
||||||
|
|
||||||
|
class TestMapper(Mapper):
|
||||||
|
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
sample[self.text_key] += "\n####################\n"
|
||||||
|
return sample
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "text_length_filter",
|
|
||||||
"displayName": "文本长度过滤器",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"author": "DataMate Team",
|
|
||||||
"description": "根据文本长度过滤数据,支持最小和最大长度限制",
|
|
||||||
"category": "数据清洗",
|
|
||||||
"type": "CUSTOM",
|
|
||||||
"inputs": [
|
|
||||||
{
|
|
||||||
"name": "input_data",
|
|
||||||
"type": "array",
|
|
||||||
"description": "输入文本数组",
|
|
||||||
"required": true
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "filtered_data",
|
|
||||||
"type": "array",
|
|
||||||
"description": "过滤后的文本数组"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"parameters": [
|
|
||||||
{
|
|
||||||
"name": "min_length",
|
|
||||||
"type": "integer",
|
|
||||||
"description": "最小文本长度",
|
|
||||||
"default": 10,
|
|
||||||
"min": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "max_length",
|
|
||||||
"type": "integer",
|
|
||||||
"description": "最大文本长度",
|
|
||||||
"default": 1000,
|
|
||||||
"min": 1
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "text_field",
|
|
||||||
"type": "string",
|
|
||||||
"description": "文本字段名称(如果输入是对象数组)",
|
|
||||||
"default": "text"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"tags": ["文本处理", "数据过滤", "长度检查"],
|
|
||||||
"documentation": "https://docs.datamate.com/operators/text-length-filter",
|
|
||||||
"repository": "https://github.com/datamate/operators/tree/main/text-length-filter"
|
|
||||||
}
|
|
||||||
@@ -1,135 +0,0 @@
|
|||||||
"""
|
|
||||||
文本长度过滤器算子
|
|
||||||
根据设定的最小和最大长度过滤文本数据
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, List, Union
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class TextLengthFilter:
|
|
||||||
"""文本长度过滤器算子"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.name = "text_length_filter"
|
|
||||||
self.version = "1.0.0"
|
|
||||||
|
|
||||||
def execute(self, config: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""执行文本长度过滤"""
|
|
||||||
|
|
||||||
logger.info(f"开始执行算子: {self.name}")
|
|
||||||
|
|
||||||
# 获取参数
|
|
||||||
parameters = config.get('parameters', {})
|
|
||||||
min_length = parameters.get('min_length', 10)
|
|
||||||
max_length = parameters.get('max_length', 1000)
|
|
||||||
text_field = parameters.get('text_field', 'text')
|
|
||||||
|
|
||||||
logger.info(f"过滤参数: min_length={min_length}, max_length={max_length}, text_field={text_field}")
|
|
||||||
|
|
||||||
# 验证参数
|
|
||||||
if min_length < 0:
|
|
||||||
raise ValueError("min_length must be >= 0")
|
|
||||||
if max_length < min_length:
|
|
||||||
raise ValueError("max_length must be >= min_length")
|
|
||||||
|
|
||||||
# 读取输入数据
|
|
||||||
input_path = context['input_path']
|
|
||||||
with open(input_path, 'r', encoding='utf-8') as f:
|
|
||||||
input_data = json.load(f)
|
|
||||||
|
|
||||||
if not isinstance(input_data, list):
|
|
||||||
raise ValueError("输入数据必须是数组格式")
|
|
||||||
|
|
||||||
logger.info(f"输入数据条数: {len(input_data)}")
|
|
||||||
|
|
||||||
# 执行过滤
|
|
||||||
filtered_data = []
|
|
||||||
stats = {
|
|
||||||
'total_input': len(input_data),
|
|
||||||
'too_short': 0,
|
|
||||||
'too_long': 0,
|
|
||||||
'filtered_out': 0,
|
|
||||||
'kept': 0
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, item in enumerate(input_data):
|
|
||||||
try:
|
|
||||||
# 提取文本内容
|
|
||||||
if isinstance(item, str):
|
|
||||||
text = item
|
|
||||||
elif isinstance(item, dict) and text_field in item:
|
|
||||||
text = str(item[text_field])
|
|
||||||
else:
|
|
||||||
logger.warning(f"跳过无法处理的数据项 {i}: {type(item)}")
|
|
||||||
stats['filtered_out'] += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 检查长度
|
|
||||||
text_length = len(text)
|
|
||||||
|
|
||||||
if text_length < min_length:
|
|
||||||
stats['too_short'] += 1
|
|
||||||
stats['filtered_out'] += 1
|
|
||||||
elif text_length > max_length:
|
|
||||||
stats['too_long'] += 1
|
|
||||||
stats['filtered_out'] += 1
|
|
||||||
else:
|
|
||||||
filtered_data.append(item)
|
|
||||||
stats['kept'] += 1
|
|
||||||
|
|
||||||
# 进度报告
|
|
||||||
if (i + 1) % 1000 == 0:
|
|
||||||
progress = (i + 1) / len(input_data) * 100
|
|
||||||
logger.info(f"处理进度: {progress:.1f}% ({i + 1}/{len(input_data)})")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"处理数据项 {i} 时出错: {e}")
|
|
||||||
stats['filtered_out'] += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 保存结果
|
|
||||||
output_path = context['output_path']
|
|
||||||
with open(output_path, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(filtered_data, f, ensure_ascii=False, indent=2)
|
|
||||||
|
|
||||||
# 准备返回结果
|
|
||||||
result = {
|
|
||||||
'status': 'success',
|
|
||||||
'statistics': stats,
|
|
||||||
'filter_rate': stats['filtered_out'] / stats['total_input'] * 100 if stats['total_input'] > 0 else 0,
|
|
||||||
'output_path': output_path
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"过滤完成: {stats}")
|
|
||||||
logger.info(f"过滤率: {result['filter_rate']:.2f}%")
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def validate_config(self, config: Dict[str, Any]) -> List[str]:
|
|
||||||
"""验证配置参数"""
|
|
||||||
errors = []
|
|
||||||
parameters = config.get('parameters', {})
|
|
||||||
|
|
||||||
min_length = parameters.get('min_length')
|
|
||||||
max_length = parameters.get('max_length')
|
|
||||||
|
|
||||||
if min_length is not None and not isinstance(min_length, int):
|
|
||||||
errors.append("min_length must be an integer")
|
|
||||||
|
|
||||||
if max_length is not None and not isinstance(max_length, int):
|
|
||||||
errors.append("max_length must be an integer")
|
|
||||||
|
|
||||||
if min_length is not None and min_length < 0:
|
|
||||||
errors.append("min_length must be >= 0")
|
|
||||||
|
|
||||||
if min_length is not None and max_length is not None and max_length < min_length:
|
|
||||||
errors.append("max_length must be >= min_length")
|
|
||||||
|
|
||||||
return errors
|
|
||||||
|
|
||||||
def create_operator():
|
|
||||||
"""算子工厂函数"""
|
|
||||||
return TextLengthFilter()
|
|
||||||
@@ -9,6 +9,7 @@ CREATE TABLE IF NOT EXISTS t_model_config
|
|||||||
api_key VARCHAR(512) DEFAULT '' COMMENT 'API 密钥(无密钥则为空)',
|
api_key VARCHAR(512) DEFAULT '' COMMENT 'API 密钥(无密钥则为空)',
|
||||||
type VARCHAR(50) NOT NULL COMMENT '模型类型(如 chat、embedding)',
|
type VARCHAR(50) NOT NULL COMMENT '模型类型(如 chat、embedding)',
|
||||||
is_enabled TINYINT DEFAULT 1 COMMENT '是否启用:1-启用,0-禁用',
|
is_enabled TINYINT DEFAULT 1 COMMENT '是否启用:1-启用,0-禁用',
|
||||||
|
is_default TINYINT DEFAULT 0 COMMENT '是否默认:1-默认,0-非默认',
|
||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||||
created_by VARCHAR(255) COMMENT '创建者',
|
created_by VARCHAR(255) COMMENT '创建者',
|
||||||
|
|||||||
@@ -11,13 +11,10 @@ RUN apt-get update && apt-get install -y \
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy the application into the container.
|
# Copy the application into the container.
|
||||||
RUN git clone https://github.com/bytedance/deer-flow.git /app
|
RUN git clone https://github.com/ModelEngine-Group/deer-flow.git /app
|
||||||
COPY runtime/deer-flow/feature_collection.patch /app/feature_collection.patch
|
|
||||||
COPY runtime/deer-flow/.env /app/.env
|
COPY runtime/deer-flow/.env /app/.env
|
||||||
COPY runtime/deer-flow/conf.yaml /app/conf.yaml
|
COPY runtime/deer-flow/conf.yaml /app/conf.yaml
|
||||||
|
|
||||||
RUN git apply feature_collection.patch
|
|
||||||
|
|
||||||
# Pre-cache the application dependencies.
|
# Pre-cache the application dependencies.
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv sync --locked --no-install-project
|
uv sync --locked --no-install-project
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ FROM node:20-alpine AS deps
|
|||||||
RUN apk add --no-cache libc6-compat openssl git
|
RUN apk add --no-cache libc6-compat openssl git
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
RUN git clone https://github.com/bytedance/deer-flow.git /deer-flow \
|
RUN git clone https://github.com/ModelEngine-Group/deer-flow.git /deer-flow \
|
||||||
&& mv /deer-flow/web/* /app \
|
&& mv /deer-flow/web/* /app \
|
||||||
&& rm -rf /deer-flow
|
&& rm -rf /deer-flow
|
||||||
# Install dependencies based on the preferred package manager
|
# Install dependencies based on the preferred package manager
|
||||||
@@ -24,12 +24,9 @@ RUN apk add --no-cache git
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
ARG NEXT_PUBLIC_API_URL="/deer-flow-backend"
|
ARG NEXT_PUBLIC_API_URL="/deer-flow-backend"
|
||||||
|
|
||||||
RUN git clone https://github.com/bytedance/deer-flow.git /deer-flow
|
RUN git clone https://github.com/ModelEngine-Group/deer-flow.git /deer-flow
|
||||||
|
|
||||||
COPY runtime/deer-flow/feature_collection.patch /deer-flow/feature_collection.patch
|
|
||||||
|
|
||||||
RUN cd /deer-flow \
|
RUN cd /deer-flow \
|
||||||
&& git apply feature_collection.patch \
|
|
||||||
&& mv /deer-flow/web/* /app \
|
&& mv /deer-flow/web/* /app \
|
||||||
&& rm -rf /deer-flow
|
&& rm -rf /deer-flow
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user