.gitignore
.pre-commit-config.yaml
README.md
example_eval.yaml
llava_repr_requirements.txt
pyproject.toml
setup.py
.github/issue_template.md
.github/pull_request_template.md
.github/workflows/black.yml
docs/README.md
docs/commands.md
docs/model_guide.md
docs/task_guide.md
lmms_eval/__init__.py
lmms_eval/__main__.py
lmms_eval/evaluator.py
lmms_eval/logging_utils.py
lmms_eval/utils.py
lmms_eval.egg-info/PKG-INFO
lmms_eval.egg-info/SOURCES.txt
lmms_eval.egg-info/dependency_links.txt
lmms_eval.egg-info/entry_points.txt
lmms_eval.egg-info/requires.txt
lmms_eval.egg-info/top_level.txt
lmms_eval/api/__init__.py
lmms_eval/api/filter.py
lmms_eval/api/instance.py
lmms_eval/api/metrics.py
lmms_eval/api/model.py
lmms_eval/api/registry.py
lmms_eval/api/samplers.py
lmms_eval/api/task.py
lmms_eval/filters/__init__.py
lmms_eval/filters/decontamination.py
lmms_eval/filters/extraction.py
lmms_eval/filters/selection.py
lmms_eval/filters/transformation.py
lmms_eval/models/__init__.py
lmms_eval/models/fuyu.py
lmms_eval/models/gpt4v.py
lmms_eval/models/instructblip.py
lmms_eval/models/llava.py
lmms_eval/models/minicpm_v.py
lmms_eval/models/qwen_vl.py
lmms_eval/models/model_utils/__init__.py
lmms_eval/models/model_utils/qwen/qwen_generate_utils.py
lmms_eval/tasks/__init__.py
lmms_eval/tasks/__pycache__/__init__.cpython-311.pyc
lmms_eval/tasks/_task_utils/file_utils.py
lmms_eval/tasks/_task_utils/gpt_eval_utils.py
lmms_eval/tasks/_task_utils/vqa_eval_metric.py
lmms_eval/tasks/_task_utils/__pycache__/file_utils.cpython-311.pyc
lmms_eval/tasks/_task_utils/__pycache__/vqa_eval_metric.cpython-311.pyc
lmms_eval/tasks/ai2d/ai2d.yaml
lmms_eval/tasks/ai2d/upload_ai2d.py
lmms_eval/tasks/ai2d/utils.py
lmms_eval/tasks/ai2d/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/chartqa/chartqa.yaml
lmms_eval/tasks/chartqa/upload_chartqa.py
lmms_eval/tasks/chartqa/utils.py
lmms_eval/tasks/chartqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/cmmmu/_cmmmu.yaml
lmms_eval/tasks/cmmmu/_default_template_cmmmu_yaml
lmms_eval/tasks/cmmmu/cmmmu_test.yaml
lmms_eval/tasks/cmmmu/cmmmu_val.yaml
lmms_eval/tasks/cmmmu/utils.py
lmms_eval/tasks/cmmmu/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/coco_cap/coco2014_cap.yaml
lmms_eval/tasks/coco_cap/coco2014_cap_test.yaml
lmms_eval/tasks/coco_cap/coco2014_cap_val.yaml
lmms_eval/tasks/coco_cap/coco2017_cap.yaml
lmms_eval/tasks/coco_cap/coco2017_cap_test.yaml
lmms_eval/tasks/coco_cap/coco2017_cap_val.yaml
lmms_eval/tasks/coco_cap/coco_cap.yaml
lmms_eval/tasks/coco_cap/utils.py
lmms_eval/tasks/coco_cap/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/docvqa/_default_template_docvqa_yaml
lmms_eval/tasks/docvqa/docvqa.yaml
lmms_eval/tasks/docvqa/docvqa_test.yaml
lmms_eval/tasks/docvqa/docvqa_val.yaml
lmms_eval/tasks/docvqa/utils.py
lmms_eval/tasks/docvqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/ferret/ferret.yaml
lmms_eval/tasks/ferret/rule.json
lmms_eval/tasks/ferret/utils.py
lmms_eval/tasks/ferret/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/flickr30k/flickr30k.yaml
lmms_eval/tasks/flickr30k/flickr30k_test.yaml
lmms_eval/tasks/flickr30k/utils.py
lmms_eval/tasks/flickr30k/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/gqa/gqa.yaml
lmms_eval/tasks/gqa/utils.py
lmms_eval/tasks/gqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/hallusion_bench/evaluate_hb.py
lmms_eval/tasks/hallusion_bench/hallusion_bench_image.yaml
lmms_eval/tasks/hallusion_bench/utils.py
lmms_eval/tasks/hallusion_bench/__pycache__/evaluate_hb.cpython-311.pyc
lmms_eval/tasks/hallusion_bench/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/iconqa/_default_template_docvqa_yaml
lmms_eval/tasks/iconqa/iconqa.yaml
lmms_eval/tasks/iconqa/iconqa_test.yaml
lmms_eval/tasks/iconqa/iconqa_val.yaml
lmms_eval/tasks/iconqa/utils.py
lmms_eval/tasks/iconqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/infovqa/_default_template_infovqa_yaml
lmms_eval/tasks/infovqa/infovqa.yaml
lmms_eval/tasks/infovqa/infovqa_test.yaml
lmms_eval/tasks/infovqa/infovqa_val.yaml
lmms_eval/tasks/infovqa/utils.py
lmms_eval/tasks/infovqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/internal_eval/__pycache__/d170_cn_utils.cpython-311.pyc
lmms_eval/tasks/internal_eval/__pycache__/d170_en_utils.cpython-311.pyc
lmms_eval/tasks/internal_eval/__pycache__/dc100_en_utils.cpython-311.pyc
lmms_eval/tasks/internal_eval/__pycache__/dc200_cn_utils.cpython-311.pyc
lmms_eval/tasks/internal_eval/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/llava-bench-coco/llava-bench-coco.yaml
lmms_eval/tasks/llava-bench-coco/rule.json
lmms_eval/tasks/llava-bench-coco/utils.py
lmms_eval/tasks/llava-bench-coco/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/llava-in-the-wild/llava-in-the-wild.yaml
lmms_eval/tasks/llava-in-the-wild/rule.json
lmms_eval/tasks/llava-in-the-wild/utils.py
lmms_eval/tasks/llava-in-the-wild/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/mathvista/mathvista.yaml
lmms_eval/tasks/mathvista/mathvista_evals.py
lmms_eval/tasks/mathvista/mathvista_test.yaml
lmms_eval/tasks/mathvista/mathvista_testmini.yaml
lmms_eval/tasks/mathvista/utils.py
lmms_eval/tasks/mathvista/__pycache__/mathvista_evals.cpython-311.pyc
lmms_eval/tasks/mathvista/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/mmbench/cc_utils.py
lmms_eval/tasks/mmbench/cn_utils.py
lmms_eval/tasks/mmbench/en_utils.py
lmms_eval/tasks/mmbench/mmbench.yaml
lmms_eval/tasks/mmbench/mmbench_cc.yaml
lmms_eval/tasks/mmbench/mmbench_cn.yaml
lmms_eval/tasks/mmbench/mmbench_cn_dev.yaml
lmms_eval/tasks/mmbench/mmbench_cn_test.yaml
lmms_eval/tasks/mmbench/mmbench_en.yaml
lmms_eval/tasks/mmbench/mmbench_en_dev.yaml
lmms_eval/tasks/mmbench/mmbench_en_test.yaml
lmms_eval/tasks/mmbench/mmbench_evals.py
lmms_eval/tasks/mmbench/__pycache__/cc_utils.cpython-311.pyc
lmms_eval/tasks/mmbench/__pycache__/cn_utils.cpython-311.pyc
lmms_eval/tasks/mmbench/__pycache__/en_utils.cpython-311.pyc
lmms_eval/tasks/mmbench/__pycache__/mmbench_evals.cpython-311.pyc
lmms_eval/tasks/mme/mme.yaml
lmms_eval/tasks/mme/utils.py
lmms_eval/tasks/mme/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/mmmu/mmmu.yaml
lmms_eval/tasks/mmmu/mmmu_test.yaml
lmms_eval/tasks/mmmu/mmmu_val.yaml
lmms_eval/tasks/mmmu/utils.py
lmms_eval/tasks/mmmu/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/mmvet/mmvet.yaml
lmms_eval/tasks/mmvet/utils.py
lmms_eval/tasks/mmvet/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/multidocvqa/multidocvqa.yaml
lmms_eval/tasks/multidocvqa/multidocvqa_test.yaml
lmms_eval/tasks/multidocvqa/multidocvqa_val.yaml
lmms_eval/tasks/multidocvqa/utils.py
lmms_eval/tasks/multidocvqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/nocaps/_default_template_nocaps_yaml
lmms_eval/tasks/nocaps/nocaps.yaml
lmms_eval/tasks/nocaps/nocaps_test.yaml
lmms_eval/tasks/nocaps/nocaps_val.yaml
lmms_eval/tasks/nocaps/utils.py
lmms_eval/tasks/nocaps/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/ok_vqa/_default_template_vqa_yaml
lmms_eval/tasks/ok_vqa/_generate_config.py
lmms_eval/tasks/ok_vqa/_ok_vqa.yaml
lmms_eval/tasks/ok_vqa/ok_vqa_val2014.yaml
lmms_eval/tasks/ok_vqa/utils.py
lmms_eval/tasks/ok_vqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/pope/pope.yaml
lmms_eval/tasks/pope/utils.py
lmms_eval/tasks/pope/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/refcoco/_default_template_bbox_yaml
lmms_eval/tasks/refcoco/_default_template_seg_yaml
lmms_eval/tasks/refcoco/_generate_config.py
lmms_eval/tasks/refcoco/_refcoco.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_test.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_testA.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_testB.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_val.yaml
lmms_eval/tasks/refcoco/refcoco_seg_test.yaml
lmms_eval/tasks/refcoco/refcoco_seg_testA.yaml
lmms_eval/tasks/refcoco/refcoco_seg_testB.yaml
lmms_eval/tasks/refcoco/refcoco_seg_val.yaml
lmms_eval/tasks/refcoco/utils.py
lmms_eval/tasks/refcoco+/_default_template_bbox_yaml
lmms_eval/tasks/refcoco+/_default_template_seg_yaml
lmms_eval/tasks/refcoco+/_generate_config.py
lmms_eval/tasks/refcoco+/_refcoco.yaml
lmms_eval/tasks/refcoco+/refcoco+_bbox_testA.yaml
lmms_eval/tasks/refcoco+/refcoco+_bbox_testB.yaml
lmms_eval/tasks/refcoco+/refcoco+_bbox_val.yaml
lmms_eval/tasks/refcoco+/refcoco+_seg_testA.yaml
lmms_eval/tasks/refcoco+/refcoco+_seg_testB.yaml
lmms_eval/tasks/refcoco+/refcoco+_seg_val.yaml
lmms_eval/tasks/refcoco+/utils.py
lmms_eval/tasks/refcoco+/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/refcoco/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/refcocog/_default_template_bbox_yaml
lmms_eval/tasks/refcocog/_default_template_seg_yaml
lmms_eval/tasks/refcocog/_generate_config.py
lmms_eval/tasks/refcocog/_refcoco.yaml
lmms_eval/tasks/refcocog/refcocog_bbox_test.yaml
lmms_eval/tasks/refcocog/refcocog_bbox_val.yaml
lmms_eval/tasks/refcocog/refcocog_seg_test.yaml
lmms_eval/tasks/refcocog/refcocog_seg_val.yaml
lmms_eval/tasks/refcocog/utils.py
lmms_eval/tasks/refcocog/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/scienceqa/scienceqa.yaml
lmms_eval/tasks/scienceqa/scienceqa_full.yaml
lmms_eval/tasks/scienceqa/scienceqa_img.yaml
lmms_eval/tasks/scienceqa/utils.py
lmms_eval/tasks/scienceqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/seedbench/seedbench.yaml
lmms_eval/tasks/seedbench/seedbench_ppl.yaml
lmms_eval/tasks/seedbench/utils.py
lmms_eval/tasks/seedbench/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/seedbench_2/seedbench_2.yaml
lmms_eval/tasks/seedbench_2/utils.py
lmms_eval/tasks/seedbench_2/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/stvqa/stvqa.yaml
lmms_eval/tasks/stvqa/utils.py
lmms_eval/tasks/stvqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/textcaps/_default_template_textcaps_yaml
lmms_eval/tasks/textcaps/textcaps.yaml
lmms_eval/tasks/textcaps/textcaps_test.yaml
lmms_eval/tasks/textcaps/textcaps_train.yaml
lmms_eval/tasks/textcaps/textcaps_val.yaml
lmms_eval/tasks/textcaps/utils.py
lmms_eval/tasks/textcaps/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/textvqa/_default_template_textvqa_yaml
lmms_eval/tasks/textvqa/_textvqa.yaml
lmms_eval/tasks/textvqa/textvqa_test.yaml
lmms_eval/tasks/textvqa/textvqa_val.yaml
lmms_eval/tasks/textvqa/utils.py
lmms_eval/tasks/textvqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/vizwiz_vqa/_default_template_vqa_yaml
lmms_eval/tasks/vizwiz_vqa/_generate_config.py
lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml
lmms_eval/tasks/vizwiz_vqa/utils.py
lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_test.yaml
lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val.yaml
lmms_eval/tasks/vizwiz_vqa/__pycache__/utils.cpython-311.pyc
lmms_eval/tasks/vqav2/_default_template_vqav2_yaml
lmms_eval/tasks/vqav2/_vqav2.yaml
lmms_eval/tasks/vqav2/utils.py
lmms_eval/tasks/vqav2/vqav2_test.yaml
lmms_eval/tasks/vqav2/vqav2_val.yaml
lmms_eval/tasks/vqav2/__pycache__/utils.cpython-311.pyc
miscs/llava_result_check.md
miscs/repr_scripts.sh
miscs/repr_torch_envs.txt
miscs/scienceqa_id.txt
miscs/script.sh
miscs/test_llava.py
miscs/test_scienceqa.py
tools/make_hf_dataset.ipynb