quant_type=='nf4': self.weight.data, self.qstate= bnb.functional.quantize_nf4((self.weight.data).to('cuda')) elif self.quant_type=='fp4': self.weight.data, self.qstate= bnb.functional.quantize_fp4((self.weight.data).to('cuda')) def forward(self, x): if self.is_quant: if ...