LICENSE
README.md
setup.py
LongNet/__init__.py
LongNet/attend.py
LongNet/attention.py
LongNet/model.py
LongNet/training.py
LongNet/utils.py
LongNet.egg-info/PKG-INFO
LongNet.egg-info/SOURCES.txt
LongNet.egg-info/dependency_links.txt
LongNet.egg-info/requires.txt
LongNet.egg-info/top_level.txt
LongNet/torchscale/__init__.py
LongNet/torchscale/setup.py
LongNet/torchscale/examples/__init__.py
LongNet/torchscale/examples/fairseq/__init__.py
LongNet/torchscale/examples/fairseq/generate.py
LongNet/torchscale/examples/fairseq/interactive.py
LongNet/torchscale/examples/fairseq/train.py
LongNet/torchscale/examples/fairseq/criterions/__init__.py
LongNet/torchscale/examples/fairseq/criterions/masked_lm_moe.py
LongNet/torchscale/examples/fairseq/models/__init__.py
LongNet/torchscale/examples/fairseq/models/bert.py
LongNet/torchscale/examples/fairseq/models/language_modeling.py
LongNet/torchscale/examples/fairseq/models/machine_translation.py
LongNet/torchscale/examples/fairseq/tasks/__init__.py
LongNet/torchscale/examples/fairseq/tasks/pretraining.py
LongNet/torchscale/examples/fairseq/tasks/data/__init__.py
LongNet/torchscale/examples/fairseq/tasks/data/basic_loader.py
LongNet/torchscale/examples/fairseq/tasks/data/mlm_loader.py
LongNet/torchscale/examples/fairseq/tasks/data/utils.py
LongNet/torchscale/examples/fairseq/utils/__init__.py
LongNet/torchscale/examples/fairseq/utils/sparse_clip.py
LongNet/torchscale/tests/__init__.py
LongNet/torchscale/tests/test_decoder.py
LongNet/torchscale/tests/test_encoder.py
LongNet/torchscale/tests/test_encoder_decoder.py
LongNet/torchscale/torchscale/__init__.py
LongNet/torchscale/torchscale/architecture/__init__.py
LongNet/torchscale/torchscale/architecture/config.py
LongNet/torchscale/torchscale/architecture/decoder.py
LongNet/torchscale/torchscale/architecture/encoder.py
LongNet/torchscale/torchscale/architecture/encoder_decoder.py
LongNet/torchscale/torchscale/architecture/utils.py
LongNet/torchscale/torchscale/component/__init__.py
LongNet/torchscale/torchscale/component/droppath.py
LongNet/torchscale/torchscale/component/embedding.py
LongNet/torchscale/torchscale/component/feedforward_network.py
LongNet/torchscale/torchscale/component/multihead_attention.py
LongNet/torchscale/torchscale/component/multiway_network.py
LongNet/torchscale/torchscale/component/relative_position_bias.py
LongNet/torchscale/torchscale/component/xpos_relative_position.py
LongNet/torchscale/torchscale/component/xmoe/__init__.py
LongNet/torchscale/torchscale/component/xmoe/global_groups.py
LongNet/torchscale/torchscale/component/xmoe/moe_layer.py
LongNet/torchscale/torchscale/component/xmoe/routing.py
LongNet/torchscale/torchscale/model/BEiT3.py
LongNet/torchscale/torchscale/model/__init__.py
flash_attn/__init__.py
flash_attn/setup.py
flash_attn/flash_attn/__init__.py
flash_attn/flash_attn/bert_padding.py
flash_attn/flash_attn/flash_attention.py
flash_attn/flash_attn/flash_attn_interface.py
flash_attn/flash_attn/flash_attn_triton.py
flash_attn/flash_attn/flash_attn_triton_og.py
flash_attn/flash_attn/flash_blocksparse_attention.py
flash_attn/flash_attn/flash_blocksparse_attn_interface.py
flash_attn/flash_attn/fused_softmax.py
flash_attn/flash_attn/layers/__init__.py
flash_attn/flash_attn/layers/patch_embed.py
flash_attn/flash_attn/layers/rotary.py
flash_attn/flash_attn/losses/__init__.py
flash_attn/flash_attn/losses/cross_entropy.py
flash_attn/flash_attn/models/__init__.py
flash_attn/flash_attn/models/bert.py
flash_attn/flash_attn/models/gpt.py
flash_attn/flash_attn/models/gpt_neox.py
flash_attn/flash_attn/models/gptj.py
flash_attn/flash_attn/models/llama.py
flash_attn/flash_attn/models/opt.py
flash_attn/flash_attn/models/vit.py
flash_attn/flash_attn/modules/__init__.py
flash_attn/flash_attn/modules/block.py
flash_attn/flash_attn/modules/embedding.py
flash_attn/flash_attn/modules/mha.py
flash_attn/flash_attn/modules/mlp.py
flash_attn/flash_attn/ops/__init__.py
flash_attn/flash_attn/ops/activations.py
flash_attn/flash_attn/ops/fused_dense.py
flash_attn/flash_attn/ops/layer_norm.py
flash_attn/flash_attn/ops/rms_norm.py
flash_attn/flash_attn/utils/__init__.py
flash_attn/flash_attn/utils/benchmark.py
flash_attn/flash_attn/utils/distributed.py
flash_attn/flash_attn/utils/generation.py
flash_attn/flash_attn/utils/pretrained.py
test/test.py