python evaluate_mmlu.py -d data/mmlu/data/ # Qwen-7B-Chat # Qwen-7B-Chat (We only provide 0-shot reproduction scripts. 5-shot results are obtained by OpenCompass (https://github.com/InternLM/opencompass).) pip install thefuzz python evaluate_chat_mmlu.py -d data/mmlu/data/ ``` @...
cal_mmlu(dev_result) @@ -308,6 +320,12 @@ def main(args): group.add_argument( "--debug", action="store_true", default=False, help="Print infos." ) group.add_argument( "--batch-size", type=int, default=1, help="batch size", ) args = parser.parse_args() set_seed(args.se...