data-prep-toolkit>=0.2.1
bs4==0.0.2
#pdf2parquet
# conflict with chunking....
#docling-core==1.2.0,
docling-ibm-models==1.1.7,
deepsearch-glm==0.21.0,
docling==1.11.0,
filetype >=1.2.0, <2.0.0,
#Doc chunking
docling-core==1.3.0,
llama-index-core>=0.11.0,<0.12.0,
duckdb==0.10.1
fasttext==0.9.2
huggingface-hub >= 0.21.4, <1.0.0
langcodes==3.3.0
mmh3==4.1.0
numpy==1.26.4
pandas
parameterized
scancode-toolkit==32.1.0 ; platform_system != 'Darwin'
sentence-transformers==3.0.1
transformers==4.38.2
xxhash==3.4.1
# PII-redactor
presidio-analyzer>=2.2.355
presidio-anonymizer>=2.2.355
flair>=0.14.0
pandas>=2.2.2


