LICENSE
README.md
setup.py
LongNet/Transformer.py
LongNet/__init__.py
LongNet/attend.py
LongNet/attention.py
LongNet/model.py
LongNet/training.py
LongNet/utils.py
LongNet.egg-info/PKG-INFO
LongNet.egg-info/SOURCES.txt
LongNet.egg-info/dependency_links.txt
LongNet.egg-info/requires.txt
LongNet.egg-info/top_level.txt
LongNet/iterations/BlocksparseDilatedAttention.py
LongNet/iterations/DilatedAttentionOP.py
LongNet/iterations/DilatedAttentionOld.py
LongNet/iterations/DistributedDilatedAttention.py
LongNet/iterations/DynamicDilatedAttention.py
LongNet/iterations/MultiModal.py
LongNet/iterations/__init__.py
LongNet/iterations/topk.py
LongNet/torchscale/__init__.py
LongNet/torchscale/architecture/__init__.py
LongNet/torchscale/architecture/config.py
LongNet/torchscale/architecture/decoder.py
LongNet/torchscale/architecture/encoder.py
LongNet/torchscale/architecture/encoder_decoder.py
LongNet/torchscale/architecture/utils.py
LongNet/torchscale/component/__init__.py
LongNet/torchscale/component/droppath.py
LongNet/torchscale/component/embedding.py
LongNet/torchscale/component/feedforward_network.py
LongNet/torchscale/component/multihead_attention.py
LongNet/torchscale/component/multiway_network.py
LongNet/torchscale/component/relative_position_bias.py
LongNet/torchscale/component/xpos_relative_position.py
LongNet/torchscale/component/xmoe/__init__.py
LongNet/torchscale/component/xmoe/global_groups.py
LongNet/torchscale/component/xmoe/moe_layer.py
LongNet/torchscale/component/xmoe/routing.py
LongNet/torchscale/model/BEiT3.py
LongNet/torchscale/model/__init__.py
test/test.py