[Feature] Refactor project to use 'datamate' naming convention for services and configurations (#14)

* Enhance CleaningTaskService to track cleaning process progress and update ExecutorType to DATAMATE

* Refactor project to use 'datamate' naming convention for services and configurations
This commit is contained in:
hhhhsc701
2025-10-22 17:53:16 +08:00
committed by GitHub
parent 175d9ded93
commit 31ef8bc265
39 changed files with 312 additions and 737 deletions

View File

@@ -111,113 +111,30 @@ VALUES ('TextFormatter', 'TXT文本抽取', '抽取TXT中的文本。', '1.0.0',
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
VALUES (3, 'TextFormatter'),
(7, 'FileExporter'),
(8, 'TextFormatter'),
(8, 'FileExporter'),
(3, 'FileWithShortOrLongLengthFilter'),
(3, 'FileWithHighRepeatPhraseRateFilter'),
(3, 'FileWithHighRepeatWordRateFilter'),
(3, 'FileWithHighSpecialCharRateFilter'),
(3, 'FileWithManySensitiveWordsFilter'),
(3, 'DuplicateFilesFilter'),
(3, 'DuplicateSentencesFilter'),
(3, 'AnonymizedCreditCardNumber'),
(3, 'AnonymizedIdNumber'),
(3, 'AnonymizedIpAddress'),
(3, 'AnonymizedPhoneNumber'),
(3, 'AnonymizedUrlCleaner'),
(3, 'HtmlTagCleaner'),
(3, 'XMLTagCleaner'),
(3, 'ContentCleaner'),
(3, 'EmailNumberCleaner'),
(3, 'EmojiCleaner'),
(3, 'ExtraSpaceCleaner'),
(3, 'FullWidthCharacterCleaner'),
(3, 'GrableCharactersCleaner'),
(3, 'InvisibleCharactersCleaner'),
(3, 'LegendCleaner'),
(3, 'PoliticalWordCleaner'),
(3, 'SexualAndViolentWordCleaner'),
(3, 'TraditionalChineseCleaner'),
(3, 'UnicodeSpaceCleaner'),
(4, 'ImgFormatter'),
(4, 'ImgBlurredImagesCleaner'),
(4, 'ImgBrightness'),
(4, 'ImgContrast'),
(4, 'ImgDenoise'),
(4, 'ImgDuplicatedImagesCleaner'),
(4, 'ImgPerspectiveTransformation'),
(4, 'ImgResize'),
(4, 'ImgSaturation'),
(4, 'ImgShadowRemove'),
(4, 'ImgSharpness'),
(4, 'ImgSimilarImagesCleaner'),
(4, 'ImgTypeUnify'),
(8, 'FileWithShortOrLongLengthFilter'),
(8, 'FileWithHighRepeatPhraseRateFilter'),
(8, 'FileWithHighRepeatWordRateFilter'),
(8, 'FileWithHighSpecialCharRateFilter'),
(8, 'FileWithManySensitiveWordsFilter'),
(8, 'DuplicateFilesFilter'),
(8, 'DuplicateSentencesFilter'),
(8, 'AnonymizedCreditCardNumber'),
(8, 'AnonymizedIdNumber'),
(8, 'AnonymizedIpAddress'),
(8, 'AnonymizedPhoneNumber'),
(8, 'AnonymizedUrlCleaner'),
(8, 'HtmlTagCleaner'),
(8, 'XMLTagCleaner'),
(8, 'ContentCleaner'),
(8, 'EmailNumberCleaner'),
(8, 'EmojiCleaner'),
(8, 'ExtraSpaceCleaner'),
(8, 'FullWidthCharacterCleaner'),
(8, 'GrableCharactersCleaner'),
(8, 'InvisibleCharactersCleaner'),
(8, 'LegendCleaner'),
(8, 'PoliticalWordCleaner'),
(8, 'SexualAndViolentWordCleaner'),
(8, 'TraditionalChineseCleaner'),
(8, 'UnicodeSpaceCleaner'),
(11, 'TextFormatter'),
(11, 'FileExporter'),
(11, 'FileWithShortOrLongLengthFilter'),
(11, 'FileWithHighRepeatPhraseRateFilter'),
(11, 'FileWithHighRepeatWordRateFilter'),
(11, 'FileWithHighSpecialCharRateFilter'),
(11, 'FileWithManySensitiveWordsFilter'),
(11, 'DuplicateFilesFilter'),
(11, 'DuplicateSentencesFilter'),
(11, 'AnonymizedCreditCardNumber'),
(11, 'AnonymizedIdNumber'),
(11, 'AnonymizedIpAddress'),
(11, 'AnonymizedPhoneNumber'),
(11, 'AnonymizedUrlCleaner'),
(11, 'HtmlTagCleaner'),
(11, 'XMLTagCleaner'),
(11, 'ContentCleaner'),
(11, 'EmailNumberCleaner'),
(11, 'EmojiCleaner'),
(11, 'ExtraSpaceCleaner'),
(11, 'FullWidthCharacterCleaner'),
(11, 'GrableCharactersCleaner'),
(11, 'InvisibleCharactersCleaner'),
(11, 'LegendCleaner'),
(11, 'PoliticalWordCleaner'),
(11, 'SexualAndViolentWordCleaner'),
(11, 'TraditionalChineseCleaner'),
(11, 'UnicodeSpaceCleaner'),
(11, 'ImgFormatter'),
(11, 'ImgBlurredImagesCleaner'),
(11, 'ImgBrightness'),
(11, 'ImgContrast'),
(11, 'ImgDenoise'),
(11, 'ImgDuplicatedImagesCleaner'),
(11, 'ImgPerspectiveTransformation'),
(11, 'ImgResize'),
(11, 'ImgSaturation'),
(11, 'ImgShadowRemove'),
(11, 'ImgSharpness'),
(11, 'ImgSimilarImagesCleaner'),
(11, 'ImgTypeUnify');
SELECT c.id, o.id
FROM t_operator_category c
CROSS JOIN t_operator o
WHERE c.id IN (3, 8, 11)
AND o.id IN ('TextFormatter', 'FileWithShortOrLongLengthFilter', 'FileWithHighRepeatPhraseRateFilter',
'FileWithHighRepeatWordRateFilter', 'FileWithHighSpecialCharRateFilter', 'FileWithManySensitiveWordsFilter',
'DuplicateFilesFilter', 'DuplicateSentencesFilter', 'AnonymizedCreditCardNumber', 'AnonymizedIdNumber',
'AnonymizedIpAddress', 'AnonymizedPhoneNumber', 'AnonymizedUrlCleaner', 'HtmlTagCleaner', 'XMLTagCleaner',
'ContentCleaner', 'EmailNumberCleaner', 'EmojiCleaner', 'ExtraSpaceCleaner', 'FullWidthCharacterCleaner',
'GrableCharactersCleaner', 'InvisibleCharactersCleaner', 'LegendCleaner', 'PoliticalWordCleaner',
'SexualAndViolentWordCleaner', 'TraditionalChineseCleaner', 'UnicodeSpaceCleaner');
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
SELECT c.id, o.id
FROM t_operator_category c
CROSS JOIN t_operator o
WHERE c.id IN (4, 8, 11)
AND o.id IN ('ImgFormatter', 'ImgBlurredImagesCleaner', 'ImgBrightness', 'ImgContrast', 'ImgDenoise',
'ImgDuplicatedImagesCleaner', 'ImgPerspectiveTransformation', 'ImgResize', 'ImgSaturation',
'ImgShadowRemove', 'ImgSharpness', 'ImgSimilarImagesCleaner', 'ImgTypeUnify');
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
SELECT c.id, o.id
FROM t_operator_category c
CROSS JOIN t_operator o
WHERE c.id IN (7, 8, 11)
AND o.id IN ('FileExporter');