Cython>=0.25
h5py>=2.6,!=3.0.0
numpy>=1.15
pandas>=1.0,!=1.1.5
scipy>=0.18
tabulate>=0.7
scikit-learn
tqdm
torch>=1.13.0
torchaudio
torchtext
torchvision
pydantic<2.0
transformers
tifffile
imagecodecs
tokenizers>=0.13.3
spacy>=2.3
PyYAML>=3.12,<6.0.1,!=5.4.* #Exlude PyYAML 5.4.* due to incompatibility with awscli
absl-py
kaggle
requests
fsspec[http]<=2023.10.0
dataclasses-json
jsonschema>=4.5.0,<4.7
marshmallow
marshmallow-jsonschema
marshmallow-dataclass==8.5.4
tensorboard
nltk    # Required for rouge scores.
torchmetrics>=0.11.0
torchinfo
filelock
psutil==5.9.4
protobuf==3.20.3
py-cpuinfo==9.0.0
gpustat
rich~=12.4.4
packaging
retry

# required for TransfoXLTokenizer when using transformer_xl
sacremoses
sentencepiece

# requirements for daft
# NOTE: Pinned to <0.2 because of deprecation of fsspec argument in Daft
getdaft<0.2

# requirement for various paged and 8-bit optimizers
bitsandbytes<0.41.0

# new data format support
xlwt                # excel
xlrd>=2.0.1         # excel
xlsxwriter>=1.4.3   # excel
openpyxl>=3.0.7     # excel
pyxlsb>=1.0.8       # excel
pyarrow             # parquet
lxml                # html
html5lib            # html

# requirement for loading hugging face datasets
datasets
