if (VLLM_GPU_LANG STREQUAL "CUDA") - cuda_archs_loose_intersection(FA2_ARCHS "8.0;9.0" "${CUDA_ARCHS}") + cuda_archs_loose_intersection(FA2_ARCHS "${CUDA_ARCHS}" "${CUDA_ARCHS}") message(STATUS "FA2_ARCHS: ${FA2_ARCHS}") set_gencode_flags_for_srcs( @@ -191,7 +191,7 @@...
1. 解释torch_cuda_arch_list变量的作用 torch_cuda_arch_list是一个环境变量,用于指定PyTorch在编译CUDA扩展(如自定义CUDA操作或层)时应支持的CUDA架构(如计算能力版本)。CUDA架构是NVIDIA GPU的一个特性,决定了GPU支持哪些CUDA指令集和特性。通过设置这个环境变量,开发者可以确保他们的CUDA扩展与特定版本的GPU兼容。
# Only build Marlin kernels if we are building for at least some compatible archs. # Keep building Marlin for 9.0 as there are some group sizes and shapes that # are not supported by Machete yet. cuda_archs_loose_intersection(MARLIN_ARCHS "8.0;8.6;8.9;9.0" ${CUDA_ARCHS}) if (MARLIN_...
static bool cv::cuda::TargetArchs::hasBin ( int major, int minor ) static Python: retval = cv.cuda.TargetArchs_hasBin( major, minor ) hasEqualOrGreater() static bool cv::cuda::TargetArchs::hasEqualOrGreater ( int major, int minor ) static Python: retval = cv.cuda.TargetArchs...