简单手撕代码: frommathimportsqrtimporttorchfromtorchimportnnclassSelf_Attention(nn.Module):def__init__(self,input_dim,k_dim,v_dim):super(Self_Attention,self).__init__()self.q=nn.Linear(input_dim,k_dim)self.k=nn.Linear(input_dim,k_dim)self.v=nn.Linear(input_dim,v_dim)self.softmax=...