Skip to content

Commit 7ce0156

Browse files
committed
Update classifier.py
1 parent 4ff042f commit 7ce0156

1 file changed

Lines changed: 7 additions & 4 deletions

File tree

src/adaptive_classifier/classifier.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,7 @@ def _from_pretrained(
635635
local_files_only: bool = False,
636636
token: Optional[Union[str, bool]] = None,
637637
use_onnx: Optional[Union[bool, str]] = "auto",
638+
prefer_quantized: bool = True,
638639
**kwargs
639640
) -> "AdaptiveClassifier":
640641
"""Load a model from the HuggingFace Hub or local directory.
@@ -1012,6 +1013,10 @@ def export_onnx(
10121013
export=True
10131014
)
10141015

1016+
# Always save unquantized version first
1017+
ort_model.save_pretrained(save_directory)
1018+
logger.info(f"Saved unquantized ONNX model to {save_directory}")
1019+
10151020
if quantize:
10161021
logger.info(f"Applying {quantization_config} INT8 quantization...")
10171022

@@ -1026,15 +1031,13 @@ def export_onnx(
10261031
logger.warning(f"Unknown quantization config: {quantization_config}. Using arm64.")
10271032
qconfig = AutoQuantizationConfig.arm64(is_static=False, per_channel=False)
10281033

1029-
# Apply quantization
1034+
# Apply quantization (saves quantized version alongside unquantized)
10301035
quantizer = ORTQuantizer.from_pretrained(ort_model)
10311036
quantizer.quantize(
10321037
save_dir=save_directory,
10331038
quantization_config=qconfig
10341039
)
1035-
else:
1036-
# Save without quantization
1037-
ort_model.save_pretrained(save_directory)
1040+
logger.info(f"Saved quantized ONNX model to {save_directory}")
10381041

10391042
logger.info(f"ONNX model exported to {save_directory}")
10401043
return save_directory

0 commit comments

Comments
 (0)