torch._C._GLIBCXX_USE_CXX11_ABI=True 133133 ext_modules.append( 134134 CUDAExtension( 135- name="flash_attn_2_cuda", 135+ name="vllm_flash_attn_2_cuda", 136136 sources=[ 137137 "csrc/flash_attn/flash_api.cpp",
+ PATCH_COMMAND ${patch_vllm_flash_attn} + UPDATE_DISCONNECTED 1 ) else() FetchContent_Declare( @@ -585,6 +589,8 @@ else() GIT_PROGRESS TRUE # Don't share the vllm-flash-attn build between build types BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn + PATCH_COMMAND ${patch_vll...
vllm_flash_attn __init__.py bert_padding.py flash_attn_interface.py flash_attn_triton.py flash_attn_triton_og.py flash_blocksparse_attention.py flash_blocksparse_attn_interface.py fused_softmax.py layers __init__.py patch_embed.py
Cancel Submit feedback Saved searches Use saved searches to filter your results more quickly Cancel Create saved search Sign in Sign up Reseting focus {{ message }} Dao-AILab / flash-attention Public Notifications You must be signed in to change notification settings...