stop_token_ids = None return llm, prompt, stop_token_ids # LlaVA-NeXT-Video # Currently only support for video input def run_llava_next_video(question): prompt = f"USER: \n{question} ASSISTANT:" llm = LLM(model="llava-hf/LLaVA-NeXT-Video-7B-hf", max_model_len=8192) stop_token...
return SequenceData(token_ids)def dummy_image_for_paligemma( hf_config: SiglipVisionConfig, *, image_width_override: Optional[int] = None, image_height_override: Optional[int] = None, ): width = height = hf_config.image_size if image_width_override is not None: ...
return llm, prompt, stop_token_ids # Molmo def run_molmo(question, modality): assert modality == "image" model_name = "allenai/Molmo-7B-D-0924" llm = LLM( model=model_name, trust_remote_code=True, dtype="bfloat16", ) prompt = question stop_token_ids = None return llm, prompt,...
return output_ids, hf_output_str, out_logprobs def run_test( hf_runner: Type[HfRunner], vllm_runner: Type[VllmRunner], image_assets: _ImageAssets, model: str, *, size_factors: List[float], dtype: str, max_tokens: int, num_logprobs: int, tensor_parallel_size: int, distributed_...
stop_token_ids = None return llm, prompt, stop_token_ids # LlaVA-NeXT-Video # Currently only support for video input def run_llava_next_video(question): prompt = f"USER: \n{question} ASSISTANT:" llm = LLM(model="llava-hf/LLaVA-NeXT-Video-7B-hf", max_model_len=8192) stop_token...
returnSequenceData(token_ids) defdummy_image_for_paligemma( hf_config:SiglipVisionConfig, *, image_width_override:Optional[int]=None, image_height_override:Optional[int]=None, ): width=height=hf_config.image_size ifimage_width_overrideisnotNone: ...