self).__init__()self.self=MultiHeadAttentionLayer(config)# 这里是左下的那个 Add & Normself.output=BertAddNorm(config.hidden_size,config.hidden_size,config.hidden_dropout_prob,config.layer_norm_eps)defforward(self,input_tensor,attention_mask=None,head_mask=None):self_outputs=self.self(input_t...