target = self.patchify(imgs) if self.norm_pix_loss: mean = target.mean(dim=-1, keepdim=True) var = target.var(dim=-1, keepdim=True) target = (target - mean) / (var + 1.e-6)**.5 loss = (pred - target)**2 loss = loss.mean(dim=-1) # [N, L], mean loss per patc...
mlp_ratio=4., norm_layer=nn.LayerNorm, norm_pix_loss=False): super().__init__() # --- # MAE encoder specifics 向量化 self.patch_embed = PatchEmbed(img_size, patch_size, in_chans, embed_dim) num_patches = self.patch_embed.num_patches #vit 的cls_token位置,大小与向量化后的维度相...
target = self.patchify(imgs) if self.norm_pix_loss: mean = target.mean(dim=-1, keepdim=True) var = target.var(dim=-1, keepdim=True) target = (target - mean) / (var + 1.e-6)**.5 loss = (pred - target)**2 loss = loss.mean(dim=-1) # [N, L], mean loss per patc...
target = self.patchify(imgs) if self.norm_pix_loss: mean = target.mean(dim=-1, keepdim=True) var = target.var(dim=-1, keepdim=True) target = (target - mean) / (var + 1.e-6)**.5 loss = (pred - target)**2 loss = loss.mean(dim=-1) # [N, L], mean loss per patc...
self.norm_pix_loss = norm_pix_lossself.initialize_weights() 第一个的值是false 等会看看有啥用 第二个是一个函数 我们进去看看 。 pos_embed = get_2d_sincos_pos_embed(self.pos_embed.shape[-1],int(self.patch_embed.num_patches**.5), cls_token=True) ...
self.norm_pix_loss = norm_pix_loss self.initialize_weights() def patchify(self, imgs): ''' imgs: (N, 3, H, W) x: (N, L, patch_size**2 *3) ''' p = self.patch_embed.patch_size[0] assert imgs.shape[2] == imgs.shape[3] and imgs.shape[2] % p == 0 h = w ...
"""target =self.patchify(imgs)ifself.norm_pix_loss: mean = target.mean(dim=-1, keepdim=True) var = target.var(dim=-1, keepdim=True) target = (target - mean) / (var +1.e-6)**.5loss = (pred - target)**2loss = loss.mean(dim=-1)# [N, L], mean loss per patchloss...
"""target = self.patchify(imgs)ifself.norm_pix_loss: mean = target.mean(dim=-1, keepdim=True) var = target.var(dim=-1, keepdim=True) target = (target - mean) / (var +1.e-6)**.5loss = (pred - target)**2loss = loss.mean(dim=-1)# [N, L], mean loss per patchloss...
if self.norm_pix_loss: mean = target.mean(axis=-1, keepdim=True) var = target.var(axis=-1, keepdim=True) target = (target - mean) / (var + 1.e-6)**.5 loss = (pred - target) ** 2 loss = loss.mean(axis=-1) # [N, L], mean loss per patch loss = (loss * mask)...
if self.norm_pix_loss: mean = target.mean(dim=-1, keepdim=True) var = target.var(dim=-1, keepdim=True) target = (target - mean) / (var + 1.e-6)**.5 loss = (pred - target)**2 loss = loss.mean(dim=-1) # [N, L], mean loss per patch ...