torch.cuda.reset_peak_memory_stats(model.device) torch.cuda.empty_cache() torch.cuda.synchronize() start_event.record() generation_output=model.generate(**input_ids,do_sample=False,max_new_tokens=512,streamer=streamer) # generation_output = model.generate(**input_ids, do_sample=False, max_...