AUTHORS
LICENSE
MANIFEST.in
README.md
pyproject.toml
setup.py
csrc/flash_attn/fmha_api.cpp
csrc/flash_attn/src/fmha.h
csrc/flash_attn/src/fmha_block_dgrad_fp16_kernel_loop.sm80.cu
csrc/flash_attn/src/fmha_block_dgrad_kernel_1xN_loop.h
csrc/flash_attn/src/fmha_block_fprop_fp16_kernel.sm80.cu
csrc/flash_attn/src/fmha_block_fprop_kernel_1xN.h
csrc/flash_attn/src/fmha_blockmask.h
csrc/flash_attn/src/fmha_bwd_hdim128.cu
csrc/flash_attn/src/fmha_bwd_hdim32.cu
csrc/flash_attn/src/fmha_bwd_hdim64.cu
csrc/flash_attn/src/fmha_bwd_launch_template.h
csrc/flash_attn/src/fmha_dgrad_kernel_1xN_loop.h
csrc/flash_attn/src/fmha_fprop_kernel_1xN.h
csrc/flash_attn/src/fmha_fwd_hdim128.cu
csrc/flash_attn/src/fmha_fwd_hdim32.cu
csrc/flash_attn/src/fmha_fwd_hdim64.cu
csrc/flash_attn/src/fmha_fwd_launch_template.h
csrc/flash_attn/src/fmha_kernel.h
csrc/flash_attn/src/fmha_utils.h
csrc/flash_attn/src/philox.cuh
csrc/flash_attn/src/static_switch.h
csrc/flash_attn/src/fmha/gemm.h
csrc/flash_attn/src/fmha/gmem_tile.h
csrc/flash_attn/src/fmha/kernel_traits.h
csrc/flash_attn/src/fmha/mask.h
csrc/flash_attn/src/fmha/smem_tile.h
csrc/flash_attn/src/fmha/softmax.h
csrc/flash_attn/src/fmha/utils.h
csrc/ft_attention/cuda_bf16_fallbacks.cuh
csrc/ft_attention/cuda_bf16_wrapper.h
csrc/ft_attention/decoder_masked_multihead_attention.cu
csrc/ft_attention/decoder_masked_multihead_attention.h
csrc/ft_attention/decoder_masked_multihead_attention_utils.h
csrc/ft_attention/ft_attention.cpp
csrc/fused_dense_lib/fused_dense.cpp
csrc/fused_dense_lib/fused_dense_cuda.cu
csrc/fused_softmax/fused_softmax.cpp
csrc/fused_softmax/scaled_masked_softmax.h
csrc/fused_softmax/scaled_masked_softmax_cuda.cu
csrc/fused_softmax/scaled_upper_triang_masked_softmax.h
csrc/fused_softmax/scaled_upper_triang_masked_softmax_cuda.cu
csrc/fused_softmax/type_shim.h
csrc/layer_norm/ln.h
csrc/layer_norm/ln_api.cpp
csrc/layer_norm/ln_bwd_1024.cu
csrc/layer_norm/ln_bwd_1280.cu
csrc/layer_norm/ln_bwd_1536.cu
csrc/layer_norm/ln_bwd_2048.cu
csrc/layer_norm/ln_bwd_256.cu
csrc/layer_norm/ln_bwd_2560.cu
csrc/layer_norm/ln_bwd_3072.cu
csrc/layer_norm/ln_bwd_4096.cu
csrc/layer_norm/ln_bwd_512.cu
csrc/layer_norm/ln_bwd_5120.cu
csrc/layer_norm/ln_bwd_6144.cu
csrc/layer_norm/ln_bwd_7168.cu
csrc/layer_norm/ln_bwd_768.cu
csrc/layer_norm/ln_bwd_8192.cu
csrc/layer_norm/ln_bwd_kernels.cuh
csrc/layer_norm/ln_fwd_1024.cu
csrc/layer_norm/ln_fwd_1280.cu
csrc/layer_norm/ln_fwd_1536.cu
csrc/layer_norm/ln_fwd_2048.cu
csrc/layer_norm/ln_fwd_256.cu
csrc/layer_norm/ln_fwd_2560.cu
csrc/layer_norm/ln_fwd_3072.cu
csrc/layer_norm/ln_fwd_4096.cu
csrc/layer_norm/ln_fwd_512.cu
csrc/layer_norm/ln_fwd_5120.cu
csrc/layer_norm/ln_fwd_6144.cu
csrc/layer_norm/ln_fwd_7168.cu
csrc/layer_norm/ln_fwd_768.cu
csrc/layer_norm/ln_fwd_8192.cu
csrc/layer_norm/ln_fwd_kernels.cuh
csrc/layer_norm/ln_kernel_traits.h
csrc/layer_norm/ln_parallel_bwd_1024.cu
csrc/layer_norm/ln_parallel_bwd_1280.cu
csrc/layer_norm/ln_parallel_bwd_1536.cu
csrc/layer_norm/ln_parallel_bwd_2048.cu
csrc/layer_norm/ln_parallel_bwd_256.cu
csrc/layer_norm/ln_parallel_bwd_2560.cu
csrc/layer_norm/ln_parallel_bwd_3072.cu
csrc/layer_norm/ln_parallel_bwd_4096.cu
csrc/layer_norm/ln_parallel_bwd_512.cu
csrc/layer_norm/ln_parallel_bwd_5120.cu
csrc/layer_norm/ln_parallel_bwd_6144.cu
csrc/layer_norm/ln_parallel_bwd_7168.cu
csrc/layer_norm/ln_parallel_bwd_768.cu
csrc/layer_norm/ln_parallel_bwd_8192.cu
csrc/layer_norm/ln_parallel_fwd_1024.cu
csrc/layer_norm/ln_parallel_fwd_1280.cu
csrc/layer_norm/ln_parallel_fwd_1536.cu
csrc/layer_norm/ln_parallel_fwd_2048.cu
csrc/layer_norm/ln_parallel_fwd_256.cu
csrc/layer_norm/ln_parallel_fwd_2560.cu
csrc/layer_norm/ln_parallel_fwd_3072.cu
csrc/layer_norm/ln_parallel_fwd_4096.cu
csrc/layer_norm/ln_parallel_fwd_512.cu
csrc/layer_norm/ln_parallel_fwd_5120.cu
csrc/layer_norm/ln_parallel_fwd_6144.cu
csrc/layer_norm/ln_parallel_fwd_7168.cu
csrc/layer_norm/ln_parallel_fwd_768.cu
csrc/layer_norm/ln_parallel_fwd_8192.cu
csrc/layer_norm/ln_parallel_residual_bwd_kernels.cuh
csrc/layer_norm/ln_parallel_residual_fwd_kernels.cuh
csrc/layer_norm/ln_utils.cuh
csrc/layer_norm/static_switch.h
csrc/rotary/rotary.cpp
csrc/rotary/rotary_cuda.cu
csrc/xentropy/interface.cpp
csrc/xentropy/xentropy_kernel.cu
flash_attn/__init__.py
flash_attn/bert_padding.py
flash_attn/flash_attention.py
flash_attn/flash_attn_interface.py
flash_attn/flash_attn_triton.py
flash_attn/flash_attn_triton_og.py
flash_attn/flash_blocksparse_attention.py
flash_attn/flash_blocksparse_attn_interface.py
flash_attn/fused_softmax.py
flash_attn/layers/__init__.py
flash_attn/layers/patch_embed.py
flash_attn/layers/rotary.py
flash_attn/losses/__init__.py
flash_attn/losses/cross_entropy.py
flash_attn/models/__init__.py
flash_attn/models/bert.py
flash_attn/models/gpt.py
flash_attn/models/gpt_neox.py
flash_attn/models/gptj.py
flash_attn/models/llama.py
flash_attn/models/opt.py
flash_attn/models/vit.py
flash_attn/modules/__init__.py
flash_attn/modules/block.py
flash_attn/modules/embedding.py
flash_attn/modules/mha.py
flash_attn/modules/mlp.py
flash_attn/ops/__init__.py
flash_attn/ops/activations.py
flash_attn/ops/fused_dense.py
flash_attn/ops/layer_norm.py
flash_attn/ops/rms_norm.py
flash_attn/utils/__init__.py
flash_attn/utils/benchmark.py
flash_attn/utils/distributed.py
flash_attn/utils/generation.py
flash_attn/utils/pretrained.py
flash_attn_wheels.egg-info/PKG-INFO
flash_attn_wheels.egg-info/SOURCES.txt
flash_attn_wheels.egg-info/dependency_links.txt
flash_attn_wheels.egg-info/requires.txt
flash_attn_wheels.egg-info/top_level.txt
tests/test_flash_attn.py
tests/test_rotary.py