attention_mask=None): # each engine has hidden_dim/TP, don't forget to multiply TP hidden_size = self.encoder_model_config.hidden_size * self.encoder_runtime_mapping.tp_size if input_ids.dim() == 1: hidden_states_shape = (input_ids.shape[0], hidden_size ...
attention_mask=None, time_encoder=False, ): ## ensure all externally provided tensors are on the correct device.encoder_input_ids = encoder_input_ids.to(self.device) decoder_input_ids = decoder_input_ids.to(self.device) if attention_mask is not None: attention_mask = torch.tensor...