LICENSE
MANIFEST.in
README.md
convert.py
requirements.txt
setup.cfg
setup.py
test_inference.py
.github/FUNDING.yml
conversion/adaptivegptq.py
conversion/compile.py
conversion/optimize.py
conversion/qparams.py
conversion/quantize.py
conversion/tokenize.py
doc/codellama_13b_instruct.png
doc/codellama_13b_instruct_thumb.png
doc/llama2_70b_chat.png
doc/llama2_70b_chat_thumb.png
examples/chat.py
examples/inference.py
examples/speculative.py
examples/streaming.py
exllamav2/__init__.py
exllamav2/attn.py
exllamav2/cache.py
exllamav2/config.py
exllamav2/embedding.py
exllamav2/ext.py
exllamav2/linear.py
exllamav2/mlp.py
exllamav2/model.py
exllamav2/model_init.py
exllamav2/module.py
exllamav2/rmsnorm.py
exllamav2/tokenizer.py
exllamav2/util.py
exllamav2.egg-info/PKG-INFO
exllamav2.egg-info/SOURCES.txt
exllamav2.egg-info/dependency_links.txt
exllamav2.egg-info/requires.txt
exllamav2.egg-info/top_level.txt
exllamav2/exllamav2_ext/config.h
exllamav2/exllamav2_ext/ext.cpp
exllamav2/exllamav2_ext/cpp/quantize_func.cpp
exllamav2/exllamav2_ext/cpp/quantize_func.h
exllamav2/exllamav2_ext/cpp/sampling.cpp
exllamav2/exllamav2_ext/cpp/sampling.h
exllamav2/exllamav2_ext/cpp/util.h
exllamav2/exllamav2_ext/cuda/compat.cuh
exllamav2/exllamav2_ext/cuda/matrix_view.cuh
exllamav2/exllamav2_ext/cuda/pack_tensor.cu
exllamav2/exllamav2_ext/cuda/pack_tensor.cuh
exllamav2/exllamav2_ext/cuda/q_attn.cu
exllamav2/exllamav2_ext/cuda/q_attn.cuh
exllamav2/exllamav2_ext/cuda/q_gemm.cu
exllamav2/exllamav2_ext/cuda/q_gemm.cuh
exllamav2/exllamav2_ext/cuda/q_gemm_kernel.cuh
exllamav2/exllamav2_ext/cuda/q_gemm_kernel_gptq.cuh
exllamav2/exllamav2_ext/cuda/q_matrix.cu
exllamav2/exllamav2_ext/cuda/q_matrix.cuh
exllamav2/exllamav2_ext/cuda/q_mlp.cu
exllamav2/exllamav2_ext/cuda/q_mlp.cuh
exllamav2/exllamav2_ext/cuda/quantize.cu
exllamav2/exllamav2_ext/cuda/quantize.cuh
exllamav2/exllamav2_ext/cuda/rms_norm.cu
exllamav2/exllamav2_ext/cuda/rms_norm.cuh
exllamav2/exllamav2_ext/cuda/rope.cu
exllamav2/exllamav2_ext/cuda/rope.cuh
exllamav2/exllamav2_ext/cuda/util.cuh
exllamav2/exllamav2_ext/cuda/quant/qdq_2.cuh
exllamav2/exllamav2_ext/cuda/quant/qdq_3.cuh
exllamav2/exllamav2_ext/cuda/quant/qdq_4.cuh
exllamav2/exllamav2_ext/cuda/quant/qdq_5.cuh
exllamav2/exllamav2_ext/cuda/quant/qdq_6.cuh
exllamav2/exllamav2_ext/cuda/quant/qdq_8.cuh
exllamav2/exllamav2_ext/cuda/quant/qdq_util.cuh
exllamav2/generator/__init__.py
exllamav2/generator/base.py
exllamav2/generator/sampler.py
exllamav2/generator/speculative.py
exllamav2/generator/streaming.py
tests/test_alloc.py
tests/test_gemv.py
util/convert_safetensors.py
util/shard.py
util/unshard.py