#将 max_position_embeddings=32768 # 修改为 max_position_embeddings=131072 02.启动模型 (vllm043) ailearn@gpts:~$ cd /data/sdb/models/ ; CUDA_VISIBLE_DEVICES=0,1,2,3 python -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 --max-model-len 131072 --model Qwen2-72B...
'max_position_embeddings': 8192, 'bf16': False, 'fp16': False, 'fp32': True, 'kv_channels': 128, 'rotary_pct': 1.0, 'rotary_emb_base': 10000, 'use_dynamic_ntk': True, 'use_logn_attn': True, 'use_flash_attn': False, 'no_bias': True, 'use_cache_quantization': False, ...
_embeddings(self): return self.wte def set_input_embeddings(self, new_embeddings): self.wte = new_embeddings def get_ntk_alpha(self, true_seq_len): context_value = math.log(true_seq_len / self.seq_length, 2) + 1 ntk_alpha = 2 ** math.ceil(context_value) - 1 ntk_alpha = max...