from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type import torch from vllm_flash_attn import flash_attn_varlen_func, flash_attn_with_kvcache from vllm import _custom_ops as ops from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, @@ -18,6 ...
from flash_attn import flash_attn_func, flash_attn_varlen_func # type: ignore from flash_attn.bert_padding import pad_input, unpad_input # type: ignore is_flash_attn_2_available = True except ImportError: is_flash_attn_2_available = False logger = logging.get_logger(__name__) Expand...
from vllm_flash_attn import flash_attn_varlen_func as _flash_attn_varlen_func from vllm_flash_attn import flash_attn_with_kvcache as _flash_attn_with_kvcache # yapf: disable from vllm.vllm_flash_attn import ( flash_attn_varlen_func as _flash_attn_varlen_func) from vllm.vllm_fla...
worker.model_runner import (ModelInputForGPUBuilder, ModelInputForGPUWithSamplingMetadata) from vllm_flash_attn import flash_attn_varlen_func as _flash_attn_varlen_func from vllm_flash_attn import flash_attn_with_kvcache as _flash_attn_with_kvcache # yapf: disable from vllm.vllm_flash_...