input_ids = tokenizer(inputs, return_tensors="pt").input_ids.to(device) generate_ids = model.generate( input_ids, max_new_tokens=1024, do_sample = True, top_p = 0.85, temperature = 1.0, repetition_penalty=1., eos_token_id=2, bos_token_id=1, pad_token_id=0) output = tokenizer...
generate( # 输入模型,生成结果 input_ids=input_ids, max_new_tokens=512, do_sample=True, top_p=0.9, temperature=0.5, repetition_penalty=1.1, eos_token_id=tokenizer.encode('<|eot_id|>')[0] ) outputs = outputs.tolist()[0][len(input_ids[0]):] response = tokenizer.decode(outputs) ...
withtorch.no_grad(): forgeneration_outputinself.model.stream_generate( input_ids.cuda(), max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p, temperature=temperature, repetition_penalty=1., eos_token_id=2, bos_token_id=1, pad_token_id=0 ): s = generation_output[0][prompt_...
model_inputs = tokenizer([text], return_tensors="pt").to('cuda') generated_ids = model.generate( model_inputs.input_ids, max_new_tokens=512, do_sample=True, top_p=0.9, temperature=0.5, repetition_penalty=1.1, eos_token_id=tokenizer.encode('<|eot_id|>')[0], ) generated_ids = ...
tokens":512,# max is 4096"do_sample":False,"top_p":1,"temperature":0.1,"repetition_penalty...
q = generate_prompt(s) inputs = tokenizer(q, return_tensors="pt") inputs = inputs.to(device=device) generate_ids = ref_model.generate( **inputs, max_new_tokens=120, do_sample=True, top_p=0.85, temperature=1.0, repetition_penalty=1.0, ...
CUDA_VISIBLE_DEVICES=0 \swift infer \--ckpt_dir "output/llama3-8b-instruct/vx-xxx/checkpoint-xxx" \--load_dataset_config true \--use_flash_attn true \--max_new_tokens 2048 \--temperature 0.1 \--top_p 0.7 \--repetition_penalty 1. \--do_sample true \--merge_lora false \ ...
input_ids=tokenizer(['Human: 介绍一下中国\nAssistant: '],return_tensors="pt",add_special_tokens=False).input_ids.to('cuda')generate_input={"input_ids":input_ids,"max_new_tokens":512,"do_sample":True,"top_k":50,"top_p":0.95,"temperature":0.3,"repetition_penalty":1.3,"eos_token...
('cuda')generate_input={"input_ids":input_ids,"max_new_tokens":512,"do_sample":True,"top_k":50,"top_p":0.95,"temperature":0.3,"repetition_penalty":1.3,"eos_token_id":tokenizer.eos_token_id,"bos_token_id":tokenizer.bos_token_id,"pad_token_id":tokenizer.pad_token_id}generate_...
{'max_length': 512, 'max_new_tokens': 512, 'num_beams': 1, 'do_sample': True, 'use_past': True, 'temperature': 0.9, 'top_k': 5, 'top_p': 0.9, 'repetition_penalty': 1.1, 'encoder_repetition_penalty': 1.0, 'renormalize_logits': True, 'pad_token_id': 0, 'bos_token_id...