conv::ImplicitGemmMode::GEMM_TN, cutlass::arch::CacheOper ation::Global, cutlass::arch::CacheOperation::Global>::TransposedPitchLinearThreadMapVec, 4>, cutlass::conv::threadblock::D wconv2dTileFilterIteratorFpropPrecomp<cutlass::MatrixShape<8, 128>, float, cutlass::layout::TensorNCHW, ...