feat: enhance dataset file fetching with improved pagination and document loading support (#156)

This commit is contained in:
Dallas98
2025-12-10 22:39:24 +08:00
committed by GitHub
parent e9fd6a3ae1
commit 2f3ae21f8a
7 changed files with 158 additions and 112 deletions

View File

@@ -32,7 +32,7 @@ RUN --mount=type=cache,target=$POETRY_CACHE_DIR \
poetry install --no-root --only main
# Download NLTK data
RUN python -c "import nltk; nltk.download('punkt_tab', download_dir='/usr/local/nltk_data')"
RUN python -c "import nltk; nltk.download(['punkt_tab','averaged_perceptron_tagger_eng'], download_dir='/usr/local/nltk_data')"
ENV NLTK_DATA=/usr/local/nltk_data
# Copy the rest of the application