<< " msn"; std::cout << " GFLOPs: " << gflops < "\n"; }; benchmark("Fused GEMM+permute", [&](){ run_mm<false>(gemm_permute); }); benchmark("Unfused GEMM+permute", [&](){ run_referencefalse>(gemm_reference); }); benchmark"Stand GEMM only", [&]()...