from typing import Iterable import torch from torch.utils.data import DataLoader from ppq import BaseGraph, QuantizationSettingFactory, TargetPlatform from ppq.api import export_ppq_graph, quantize_onnx_model BATCHSIZE = 32 INPUT_SHAPE = [3, 224, 224] DEVICE = 'cuda' # only cuda is fully ...
import onnx from PIL import Image onnx_path = '/docker_mount/latest.onnx' m = onnx.load(onnx_path) m = ktc.onnx_optimizer.onnx2onnx_flow(m) onnx.save(m,'latest.opt.onnx') # npu (only) performance simulation km = ktc.ModelConfig(32769, "0001", "630", onnx_model=m) eval...
示例:ONNX模型的量化处理(从float32到int8) from onnxruntime.quantization import quantize_dynamic, QuantType import onnx # 加载已导出的ONNX模型 model_path = "matmul_model.onnx" quantized_model_path = "matmul_model_quantized.onnx" # 对模型进行动态量化 quantize_dynamic( model_path, quantized_mo...
如果PLATFORM = TargetPlatform.QNN_DSP_INT8, quantize_torch_model.py会export出一个 .json 和 .onnx(看起来和fp32原模型一样);如果PLATFORM = TargetPlatform.ONNXRUNTIME,会生成一个.json 和 .onnx(体积比原模型小了,且是一个QDQ模型),如图 但想问一下,如何生成一个QNN模型,比如intel的neural-compresso...
quantize_dsp.py quantize_onnx_model.py quantize_torch_model.py yolo6_sample.py scheduler utils README.md __init__.py tests .gitignore .pre-commit-config.yaml LICENSE MANIFEST.in ProgramEntrance_1.py ProgramEntrance_2.py README.md