You've already forked DataMate
feature: add unstructured xlsx/xls/csv/pptx/ppt (#41)
* feature: add UnstructuredFormatter * feature: add UnstructuredFormatter in db * feature: add unstructured[docx]==0.18.15 * feature: support doc * feature: add mineru * feature: add external pdf extract operator by using mineru * feature: mineru docker install bugfix * feature: add unstructured xlsx/xls/csv/pptx/ppt --------- Co-authored-by: Startalker <438747480@qq.com>
This commit is contained in:
@@ -23,6 +23,7 @@ class FileExporter(Mapper):
|
|||||||
super(FileExporter, self).__init__(*args, **kwargs)
|
super(FileExporter, self).__init__(*args, **kwargs)
|
||||||
self.last_ops = True
|
self.last_ops = True
|
||||||
self.text_support_ext = kwargs.get("text_support_ext", ['txt', 'html', 'md', 'markdown',
|
self.text_support_ext = kwargs.get("text_support_ext", ['txt', 'html', 'md', 'markdown',
|
||||||
|
'xlsx', 'xls', 'csv', 'pptx', 'ppt',
|
||||||
'xml', 'json', 'doc', 'docx', 'pdf'])
|
'xml', 'json', 'doc', 'docx', 'pdf'])
|
||||||
self.data_support_ext = kwargs.get("data_support_ext", ['jpg', 'jpeg', 'png', 'bmp'])
|
self.data_support_ext = kwargs.get("data_support_ext", ['jpg', 'jpeg', 'png', 'bmp'])
|
||||||
self.medical_support_ext = kwargs.get("medical_support_ext", ['svs', 'tif', 'tiff'])
|
self.medical_support_ext = kwargs.get("medical_support_ext", ['svs', 'tif', 'tiff'])
|
||||||
|
|||||||
@@ -19,4 +19,4 @@ xmltodict==1.0.2
|
|||||||
zhconv==1.4.3
|
zhconv==1.4.3
|
||||||
sqlalchemy==2.0.40
|
sqlalchemy==2.0.40
|
||||||
pymysql==1.1.1
|
pymysql==1.1.1
|
||||||
unstructured[docx]==0.18.15
|
unstructured[docx,csv,xlsx,pptx]==0.18.15
|
||||||
@@ -8,7 +8,7 @@ COPY scripts/images/runtime/start.sh /opt/runtime/start.sh
|
|||||||
ENV PYTHONPATH=/opt/runtime/datamate/
|
ENV PYTHONPATH=/opt/runtime/datamate/
|
||||||
|
|
||||||
RUN apt update \
|
RUN apt update \
|
||||||
&& apt install -y libgl1 libglib2.0-0 vim poppler-utils tesseract-ocr tesseract-ocr-chi-sim libmagic1t64 libreoffice\
|
&& apt install -y libgl1 libglib2.0-0 vim libmagic1t64 libreoffice\
|
||||||
&& apt clean \
|
&& apt clean \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user