init datamate

This commit is contained in:
Dallas98
2025-10-21 23:00:48 +08:00
commit 1c97afed7d
692 changed files with 135442 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from typing import List
from loguru import logger
from datamate.common.utils.text_splitter import TextSplitter
class TextSegmentationOperator:
def __init__(self, chunk_size, chunk_overlap):
try:
self.text_splitter = TextSplitter(-1, chunk_size, chunk_overlap)
except Exception as err:
logger.exception(f"init text splitter failed, error is: {err}")
raise err
def process(self, input_data: str) -> List[str]:
if input_data.strip() == "":
logger.info("input text is empty, return empty chunks.")
return []
return self.text_splitter.split_text(input_data)