You've already forked DataMate
init datamate
This commit is contained in:
22
runtime/ops/slicer/__init__.py
Normal file
22
runtime/ops/slicer/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datamate.common.utils.custom_importer import CustomImporter
|
||||
|
||||
|
||||
def _configure_importer():
|
||||
base_path = Path(__file__).resolve().parent
|
||||
sys.meta_path.append(CustomImporter(base_path))
|
||||
|
||||
|
||||
_configure_importer()
|
||||
|
||||
|
||||
def _import_operators():
|
||||
from . import slide_simple_slicer
|
||||
from . import slide_annotation_slicer
|
||||
from . import segmentation
|
||||
|
||||
|
||||
_import_operators()
|
||||
6
runtime/ops/slicer/segmentation/__init__.py
Normal file
6
runtime/ops/slicer/segmentation/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from datamate.core.base_op import OPERATORS
|
||||
|
||||
OPERATORS.register_module(module_name='Segmentation',
|
||||
module_path="ops.slicer.segmentation.process")
|
||||
16
runtime/ops/slicer/segmentation/metadata.yml
Normal file
16
runtime/ops/slicer/segmentation/metadata.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
name: '文本切分'
|
||||
name_en: 'Text Segmentation'
|
||||
description: '将文本切分成多个切片。'
|
||||
description_en: 'Text Segmentation.'
|
||||
language: 'python'
|
||||
vendor: 'huawei'
|
||||
raw_id: 'Segmentation'
|
||||
version: '1.0.0'
|
||||
types:
|
||||
- 'consolidate'
|
||||
modal: 'text'
|
||||
effect:
|
||||
before: ''
|
||||
after: ''
|
||||
inputs: 'text'
|
||||
outputs: 'text'
|
||||
62
runtime/ops/slicer/segmentation/process.py
Normal file
62
runtime/ops/slicer/segmentation/process.py
Normal file
@@ -0,0 +1,62 @@
|
||||
#!/user/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Description: 文本切分算子
|
||||
Create: 2023/11/09 10:17
|
||||
"""
|
||||
|
||||
import random
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from datamate.common.utils.text_splitter import TextSplitter
|
||||
from datamate.core.base_op import Slicer
|
||||
|
||||
|
||||
class TextSegmentationOperator:
|
||||
def __init__(self, max_characters, chunk_size, chunk_overlap):
|
||||
try:
|
||||
self.text_splitter = TextSplitter(max_characters, chunk_size, chunk_overlap)
|
||||
except Exception as err:
|
||||
logger.exception(f"init text splitter failed, error is: {err}")
|
||||
raise Exception(83001, "init text splitter failed") from None
|
||||
|
||||
def process(self, input_data: str) -> List[str]:
|
||||
if input_data.strip() == "":
|
||||
logger.info("input text is empty, return empty chunks.")
|
||||
return []
|
||||
return self.text_splitter.split_text(input_data)
|
||||
|
||||
|
||||
class Segmentation(Slicer):
|
||||
"""切片算法插件"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Segmentation, self).__init__(*args, **kwargs)
|
||||
self.max_characters = kwargs.get("maxCharacters", -1)
|
||||
self.chunk_size = kwargs.get("chunkSize", 800)
|
||||
self.chunk_overlap = kwargs.get("chunkOverlap", 100)
|
||||
self.slice_num = kwargs.get("sliceNum", 5)
|
||||
self.op = TextSegmentationOperator(self.max_characters, self.chunk_size, self.chunk_overlap)
|
||||
self.last_ops = True
|
||||
|
||||
def execute(self, sample: Dict[str, Any]) -> List[Dict]:
|
||||
|
||||
try:
|
||||
chunks = self.op.process(sample[self.text_key])
|
||||
except Exception as err:
|
||||
logger.exception(f"split text failed, error is: {err}")
|
||||
raise Exception(83002, "init text splitter failed") from None
|
||||
num_to_sample = min(self.slice_num, len(chunks))
|
||||
sampled_indices = random.sample(chunks, num_to_sample)
|
||||
for idx, chunk in enumerate(sampled_indices):
|
||||
temp_sample = {self.text_key: chunk, self.data_key: "", self.export_path_key: sample[self.export_path_key],
|
||||
self.filename_key: sample[self.filename_key], self.fileid_key: sample[self.fileid_key],
|
||||
"sequenceId": str(idx), "chunkSize": str(len(chunk))}
|
||||
self.save_patch_sample(temp_sample, idx, save_format="text")
|
||||
|
||||
sample["fileNum"] = len(chunks)
|
||||
sample[self.text_key] = "Success"
|
||||
|
||||
return [sample]
|
||||
6
runtime/ops/slicer/slide_annotation_slicer/__init__.py
Normal file
6
runtime/ops/slicer/slide_annotation_slicer/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from datamate.core.base_op import OPERATORS
|
||||
|
||||
OPERATORS.register_module(module_name='AnnotationSlicer',
|
||||
module_path="ops.slicer.slide_annotation_slicer.process")
|
||||
16
runtime/ops/slicer/slide_annotation_slicer/metadata.yml
Normal file
16
runtime/ops/slicer/slide_annotation_slicer/metadata.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
name: '病理图片标注切片'
|
||||
name_en: 'Pathological Image Annotation Slicing'
|
||||
description: '根据标注文件对病理图片进行切片。'
|
||||
description_en: 'Slicing pathology image based on annotation file..'
|
||||
language: 'python'
|
||||
vendor: 'huawei'
|
||||
raw_id: 'AnnotationSlicer'
|
||||
version: '1.0.0'
|
||||
types:
|
||||
- 'consolidate'
|
||||
modal: 'image'
|
||||
effect:
|
||||
before: ''
|
||||
after: ''
|
||||
inputs: 'image'
|
||||
outputs: 'image'
|
||||
117
runtime/ops/slicer/slide_annotation_slicer/process.py
Normal file
117
runtime/ops/slicer/slide_annotation_slicer/process.py
Normal file
@@ -0,0 +1,117 @@
|
||||
# -- encoding: utf-8 --
|
||||
|
||||
"""
|
||||
Description: 医疗图片按坐标切片
|
||||
Create: 2025/02/08 11:00
|
||||
"""
|
||||
import copy
|
||||
import time
|
||||
import os
|
||||
from typing import List, Dict, Any
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
from loguru import logger
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
from openslide import OpenSlide
|
||||
|
||||
from datamate.core.base_op import Slicer
|
||||
from datamate.common.utils import bytes_transform
|
||||
|
||||
|
||||
class AnnotationSlicer(Slicer):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnnotationSlicer, self).__init__(*args, **kwargs)
|
||||
self.last_ops = True
|
||||
|
||||
def execute(self, sample: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
start = time.time()
|
||||
|
||||
slide: OpenSlide = OpenSlide(sample[self.filepath_key])
|
||||
if not isinstance(slide, OpenSlide):
|
||||
logger.error("Not desired <Image.Image> object.")
|
||||
|
||||
annotation_path: str = sample["extraFilePath"]
|
||||
annotations = self.parse_xml_annotations(annotation_path)
|
||||
|
||||
patch_num = self.auto_coordinate_slicer(sample, slide, annotations)
|
||||
sample["slice_num"] = patch_num
|
||||
|
||||
file_name = sample[self.filename_key]
|
||||
logger.info(f"fileName: {file_name}, method: CoordinateSlider costs {(time.time() - start):6f} s")
|
||||
|
||||
return [sample]
|
||||
|
||||
def parse_xml_annotations(self, xml_path: str) -> List:
|
||||
""" 解析 XML 文件,提取所有 Annotation 的坐标和 PartOfGroup """
|
||||
tree = ET.parse(xml_path)
|
||||
root = tree.getroot()
|
||||
|
||||
annotations = []
|
||||
|
||||
# 找到所有 <Annotations> 标签
|
||||
annotations_tag = root.find('Annotations')
|
||||
if annotations_tag is None:
|
||||
raise ValueError("未找到 Annotations 标签")
|
||||
|
||||
# 遍历所有 <Annotation> 标签
|
||||
for annotation in annotations_tag.findall('Annotation'):
|
||||
part_of_group = annotation.get('PartOfGroup')
|
||||
coordinates = []
|
||||
for coord in annotation.find('Coordinates').findall('Coordinate'):
|
||||
x = float(coord.get('X'))
|
||||
y = float(coord.get('Y'))
|
||||
coordinates.append((x, y))
|
||||
annotations.append({
|
||||
'part_of_group': part_of_group,
|
||||
'coordinates': np.array(coordinates, dtype=np.int32)
|
||||
})
|
||||
|
||||
return annotations
|
||||
|
||||
def auto_coordinate_slicer(
|
||||
self,
|
||||
original_sample: Dict,
|
||||
slide: OpenSlide,
|
||||
annotations: List
|
||||
) -> int:
|
||||
"""
|
||||
自动根据给定的标注文件切片原图像
|
||||
|
||||
Return:
|
||||
List[Content] 每个 content 都是一个 data 为 patch 的 content
|
||||
"""
|
||||
wsi_width, wsi_height = slide.dimensions
|
||||
|
||||
patch_no = 0
|
||||
# 遍历每个 Annotation
|
||||
for _, annotation in enumerate(annotations):
|
||||
part_of_group = annotation['part_of_group']
|
||||
coordinates = annotation['coordinates']
|
||||
|
||||
# 转换坐标为整数(确保在图像范围内)
|
||||
coordinates = coordinates.clip(min=0, max=(wsi_width, wsi_height))
|
||||
|
||||
# 创建掩码(mask)图像
|
||||
mask = np.zeros((wsi_height, wsi_width), dtype=np.uint8)
|
||||
cv2.fillPoly(mask, [coordinates], 255) # 填充多边形区域为白色
|
||||
|
||||
# 找到掩码中的非零区域(肿瘤区域)
|
||||
x, y, w, h = cv2.boundingRect(coordinates) # 获取多边形的边界框
|
||||
|
||||
# 读取 WSI 图像的切片区域
|
||||
region = slide.read_region((x, y), 0, (w, h))
|
||||
|
||||
# 转换为 NumPy 数组
|
||||
region_np = np.array(region.convert("RGB"))
|
||||
|
||||
patch_sample = copy.deepcopy(original_sample)
|
||||
patch_sample[self.data_key] = bytes_transform.numpy_to_bytes(region_np, '.png')
|
||||
patch_no += 1
|
||||
self.save_patch_sample(patch_sample, patch_no, save_format="image")
|
||||
|
||||
logger.info(f">>> {patch_no} annotations found and sliced.")
|
||||
|
||||
return patch_no
|
||||
6
runtime/ops/slicer/slide_simple_slicer/__init__.py
Normal file
6
runtime/ops/slicer/slide_simple_slicer/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from datamate.core.base_op import OPERATORS
|
||||
|
||||
OPERATORS.register_module(module_name='SimpleSlicer',
|
||||
module_path="ops.slicer.slide_simple_slicer.process")
|
||||
43
runtime/ops/slicer/slide_simple_slicer/metadata.yml
Normal file
43
runtime/ops/slicer/slide_simple_slicer/metadata.yml
Normal file
@@ -0,0 +1,43 @@
|
||||
name: '病理图片自动切片'
|
||||
name_en: 'Pathological Image Automatic Slicing'
|
||||
description: '按照给定规格对病理图片进行切片。'
|
||||
description_en: 'Slicing pathology image with given box size.'
|
||||
language: 'python'
|
||||
vendor: 'huawei'
|
||||
raw_id: 'SimpleSlicer'
|
||||
version: '1.0.0'
|
||||
types:
|
||||
- 'consolidate'
|
||||
modal: 'image'
|
||||
effect:
|
||||
before: ''
|
||||
after: ''
|
||||
inputs: 'image'
|
||||
outputs: 'image'
|
||||
settings:
|
||||
sliceSize:
|
||||
name: 切片尺寸
|
||||
type: multiple
|
||||
properties:
|
||||
- type: inputNumber
|
||||
name: 宽度
|
||||
description: 像素
|
||||
defaultVal: 128
|
||||
min: 100
|
||||
max: 1024
|
||||
step: 1
|
||||
- type: inputNumber
|
||||
name: 高度
|
||||
description: 像素
|
||||
defaultVal: 128
|
||||
min: 100
|
||||
max: 1024
|
||||
step: 1
|
||||
overlap:
|
||||
name: 重叠比例
|
||||
description: 重叠比例越大,切片间重叠面积越大。
|
||||
type: slider
|
||||
defaultVal: 0
|
||||
min: 0
|
||||
max: 0.9
|
||||
step: 0.1
|
||||
98
runtime/ops/slicer/slide_simple_slicer/process.py
Normal file
98
runtime/ops/slicer/slide_simple_slicer/process.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# -- encoding: utf-8 --
|
||||
|
||||
"""
|
||||
Description: 医疗图片按坐标切片
|
||||
Create: 2025/02/08 11:00
|
||||
"""
|
||||
import copy
|
||||
import time
|
||||
from typing import List, Tuple, Dict, Any
|
||||
|
||||
import itertools
|
||||
from loguru import logger
|
||||
|
||||
from openslide import OpenSlide
|
||||
import numpy as np
|
||||
|
||||
from datamate.core.base_op import Slicer
|
||||
|
||||
from datamate.common.utils import bytes_transform
|
||||
|
||||
|
||||
class SimpleSlicer(Slicer):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(SimpleSlicer, self).__init__(*args, **kwargs)
|
||||
|
||||
self._target_size = kwargs.get("sliceSize", [128, 128])
|
||||
self._overlap = kwargs.get("overlap", 0)
|
||||
self.last_ops = True
|
||||
|
||||
if not isinstance(self._target_size, List):
|
||||
raise TypeError(f"<targetSize> received as {type(self._target_size)}, but expected list.")
|
||||
if len(self._target_size) != 2:
|
||||
raise ValueError(f"<targetSize> has {len(self._target_size)} elements, but expected 2.")
|
||||
if not all(isinstance(dim, int) for dim in self._target_size):
|
||||
raise TypeError(f"Elements in <targetSize> must be integers, but got {self._target_size}.")
|
||||
if not isinstance(self._overlap, (int, float)):
|
||||
raise TypeError(f"<overlap> received as {type(self._overlap)}, but expected int.")
|
||||
if self._overlap < 0 or self._overlap > 1:
|
||||
raise ValueError(
|
||||
f"<overlap> received an out of range value: {self._overlap}, "
|
||||
f"but (0 <= overlap <= 1) is expected."
|
||||
)
|
||||
|
||||
def execute(self, sample: Dict[str, Any]) -> List[Dict]:
|
||||
start = time.time()
|
||||
|
||||
slide: OpenSlide = OpenSlide(sample["filePath"])
|
||||
if not isinstance(slide, OpenSlide):
|
||||
logger.error("Not desired <Image.Image> object.")
|
||||
dimensions: tuple[int, int] = slide.dimensions
|
||||
|
||||
target_size = self._target_size
|
||||
overlap = self._overlap
|
||||
|
||||
patch_num = self.auto_simple_slicer(sample, slide, dimensions, target_size, overlap)
|
||||
sample["slice_num"] = patch_num
|
||||
|
||||
file_name = sample[self.filename_key]
|
||||
logger.info(f"fileName: {file_name}, method: CoordinateSlider costs {(time.time() - start):6f} s")
|
||||
|
||||
return [sample]
|
||||
|
||||
def auto_simple_slicer(
|
||||
self,
|
||||
original_sample: Dict[str, Any],
|
||||
slide: OpenSlide,
|
||||
dimensions: Tuple[int, int],
|
||||
target_size: Tuple[int, int],
|
||||
overlap: float
|
||||
) -> int:
|
||||
"""
|
||||
自动根据给定规格切片原图像
|
||||
|
||||
Return:
|
||||
List[Content] 每个 content 都是一个 data 为 patch 的 content
|
||||
"""
|
||||
stride_x, stride_y = map(lambda x: int(x * (1 - overlap)), target_size)
|
||||
w, h = target_size
|
||||
|
||||
patch_no = 0
|
||||
for x, y in itertools.product(
|
||||
range(0, dimensions[0] - w + 1, stride_x),
|
||||
range(0, dimensions[1] - h + 1, stride_y)
|
||||
):
|
||||
# 切片
|
||||
region = slide.read_region((x, y), 0, target_size)
|
||||
|
||||
region_np = np.array(region.convert("RGB"))
|
||||
|
||||
patch_sample = copy.deepcopy(original_sample)
|
||||
patch_sample[self.data_key] = bytes_transform.numpy_to_bytes(region_np, ".png")
|
||||
patch_no += 1
|
||||
self.save_patch_sample(patch_sample, patch_no, save_format="image")
|
||||
|
||||
logger.info(f"One image sliced into pieces: {patch_no}")
|
||||
|
||||
return patch_no
|
||||
Reference in New Issue
Block a user