packages/vllm/engine/async_llm_engine.py", line 191, in step_async output = await self._run_workers_async( File "/home/user/projects/repos/transformers/.venv/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 227, in _run_workers_async assert output == other_output ...
all_outputs.append(output) if self.parallel_config.worker_use_ray: all_outputs = ray.get(all_outputs) if get_all_outputs: return all_outputs # Make sure all workers have the same results. output = all_outputs[0] for other_output in all_outputs[1:]: assert output == other_output retu...
packages/vllm-0.2.1-py3.10-linux-x86_64.egg/vllm/engine/llm_engine.py",line562,instepoutput=self._run_workers(File"/opt/conda/envs/vllm/lib/python3.10/site-packages/vllm-0.2.1-py3.10-linux-x86_64.egg/vllm/engine/llm_engine.py",line712,in_run_workersassertoutput==other_output...
output_proc_callback=Noneiffrozen_model_input.async_callbackisnotNone:output_proc_callback=frozen_model_input.async_callbackassertoutput_proc_callbackisnotNoneasync_callback=functools.partial(self._async_process_outputs,model_input=model_input,output_proc_callback=output_proc_callback) 这里会指向: ...
将采样结果保存到 output inferencce pipeline 2. 整体核心模块 上图给出了 vLLM 核心模块之间的结构关系。接下来我们从简单的模块(即输入、采样和输出)开始介绍,最后详细介绍 LLM 模块。 3. Sequence 如上图我们可以看到 vLLM 为输入的句子设计了很多子模块,这些模块的用处各不相同,但是有彼此之间有关系,下面...
{"metadata": {"workload_type": "token-norm-dist", "input_mean": 2048, "input_stdev": 0, "output_mean": 128, "output_stdev": 0, "num_requests": 512, "tokenize_vocabsize": 92544, "max_input_len": 2048, "max_output_len": 128, "workload_name": "workload_type:token-norm-di...
assert CUDA_HOME is not None, "CUDA_HOME is not set" nvcc_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"], universal_newlines=True) output = nvcc_output.split() release_idx = output.index("release") + 1 nvcc_cuda_version = parse(output[release_idx].split...
"/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py"
"/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py"
assert CUDA_HOME is not None, "CUDA_HOME is not set" nvcc_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"], universal_newlines=True) output = nvcc_output.split() release_idx = output.index("release") + 1 nvcc_cuda_version = parse(output[release_idx].split...