Linear8bitLt(64, 10, has_fp16_weights=False) ) def forward(self, x): x = self.flatten(x) x = self.model(x) return F.log_softmax(x, dim=1) device = torch.device("cuda") # Load model = Net8Bit() model.load_state_dict(torch.load("mnist_model.pt")) get_size_kb(model) ...