# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length] head_mask = self.get_head_mask(head_mask, len(self.config.depths)) embedding_output, input_dimensions = self.embeddings(pixel_values, bool_masked_pos=bool_masked_pos) encoder_outpu...
add_position_embedding ... True add_qkv_bias ... False add_rmsnorm_offset ... False adlr_autoresume ... False adlr_autoresume_interval ... 1000 apply_layernorm_1p ... False apply_query_key_layer_scaling ...
rnn_input_mode, rnn_direction_mode]: 2, 0, 0 , [num_layers, input_size, num_units, dir_count, max_seq_length, batch_size, cell_num_units]: [1, 32, 32, 1, 5, 12928