out_channels,num_res_blocks,attention_resolutions,dropout=0,channel_mult=(1,2,4,8),conv_resample=True,dims=2,num_classes=None,use_checkpoint=False,use_fp16=False,num_heads=-1,num_head_channels=-1,num_heads_upsample=-1,use_scale_shift_norm=False,resblock_updown=False,use_new_attention_...
CONV,即初始卷积 最开始有一个基础通道数model\_channels=320,以及通道数倍数channel\_mult=[1,2,4,4]。后者控制每经过一个第一种编码块,通道数翻的倍数,其长度也决定了有多少个第一种编码块。 前两种类型的编码块是交替堆叠的,根据默认的配置文件,一般是n=2个第一种类型的编码块和 1 个第二种类型的编码...
for level, mult in enumerate(channel_mult): for _ in range(num_res_blocks): layers = [ ResBlock( ch, time_embed_dim, dropout, out_channels=mult * model_channels, dims=dims, use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, ) ] 1. 2. 3. 4. 5. 6. 7....
input_channel = _make_divisible(input_channel * width_mult, round_nearest) self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) features = [ConvBNReLU(3, input_channel, stride=2)] for t, c, n, s in inverted_residual_setting: output_channel = _mak...
213 + if level != len(channel_mult) - 1: 214 + self.input_blocks.append( 215 + TimestepEmbedSequential(Downsample(ch, use_conv=True)) 216 + ) 217 + input_block_chans.append(ch) 218 + ds *= 2 178 219 179 220 self.middle_block = TimestepEmbedSequential( 180 221 Res...
'num_res_blocks': 2, 'attention_resolutions': [2, 4], 'transformer_depth': [0, 4, 4, 0], 'channel_mult': [1, 2, 4, 4], 1098 + 'transformer_depth_middle': 4, 'use_linear_in_transformer': True, 'context_dim': 1280} 1099 + 1100 + SD21 = {'use_checkpoint': False,...
channel_mult=(1, 2, 4, 8), conv_resample=True, dims=2, num_classes=None, num_heads=-1, num_head_channels=-1, num_heads_upsample=-1, use_scale_shift_norm=False, resblock_updown=False, transformer_depth=1, context_dim=None, n_embed=None, num_attention_blocks=No...
last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) features = [ConvBNReLU(3, input_channel, stride=2)] current_stride *= 2 dilation=1 previous_dilation = 1 # building inverted residual blocks for t, c, n, s in inverted_residual_setting: output_channel...
for level, mult in enumerate(channel_mult): for nr in range(self.num_res_blocks[level]): layers = [ResBlock(ch,time_embed_dim,dropout, out_channels=mult * model_channels, dims=dims,use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm,)] ch = mult * model_...
channel_mult_noise=1, Expand All@@ -203,6 +206,7 @@ def main(cfg: DictConfig) -> None: embedding_type="fourier", encoder_type="residual", decoder_type="standard", checkpoint_level=songunet_checkpoint_level, ) c.network_kwargs.update( ...