|
24 | 24 | PerTensor,
|
25 | 25 | PerToken,
|
26 | 26 | )
|
| 27 | +from torchao.kernel import ( |
| 28 | + safe_int_mm, |
| 29 | + int_scaled_matmul, |
| 30 | +) |
27 | 31 | from .linear_activation_quantized_tensor import (
|
28 | 32 | LinearActivationQuantizedTensor,
|
29 | 33 | to_linear_activation_quantized,
|
|
70 | 74 | compute_error,
|
71 | 75 | )
|
72 | 76 | from .weight_only import WeightOnlyInt8QuantLinear
|
| 77 | +from .linear_activation_weight_observed_tensor import ( |
| 78 | + to_linear_activation_weight_observed, |
| 79 | +) |
73 | 80 |
|
74 | 81 | __all__ = [
|
75 |
| - "swap_conv2d_1x1_to_linear", |
| 82 | + # top level API - auto |
76 | 83 | "autoquant",
|
77 | 84 | "DEFAULT_AUTOQUANT_CLASS_LIST",
|
78 | 85 | "DEFAULT_INT4_AUTOQUANT_CLASS_LIST",
|
79 | 86 | "OTHER_AUTOQUANT_CLASS_LIST",
|
80 |
| - "get_scale", |
81 |
| - "SmoothFakeDynQuantMixin", |
82 |
| - "SmoothFakeDynamicallyQuantizedLinear", |
83 |
| - "swap_linear_with_smooth_fq_linear", |
84 |
| - "smooth_fq_linear_to_inference", |
85 |
| - "set_smooth_fq_attribute", |
86 |
| - "compute_error", |
87 |
| - "Int4WeightOnlyGPTQQuantizer", |
88 |
| - "Int4WeightOnlyQuantizer", |
89 |
| - "quantize_affine", |
90 |
| - "dequantize_affine", |
91 |
| - "choose_qparams_affine", |
| 87 | + |
| 88 | + # top level API - manual |
92 | 89 | "quantize_",
|
93 | 90 | "int8_dynamic_activation_int4_weight",
|
94 | 91 | "int8_dynamic_activation_int8_weight",
|
95 | 92 | "int8_dynamic_activation_int8_semi_sparse_weight",
|
96 | 93 | "int4_weight_only",
|
97 | 94 | "int8_weight_only",
|
| 95 | + "float8_weight_only", |
| 96 | + "float8_dynamic_activation_float8_weight", |
| 97 | + "float8_static_activation_float8_weight" |
98 | 98 | "uintx_weight_only",
|
99 | 99 | "fpx_weight_only",
|
100 |
| - "LinearActivationQuantizedTensor", |
| 100 | + |
| 101 | + # smooth quant - subject to change |
| 102 | + "swap_conv2d_1x1_to_linear" |
| 103 | + "get_scale", |
| 104 | + "SmoothFakeDynQuantMixin", |
| 105 | + "SmoothFakeDynamicallyQuantizedLinear", |
| 106 | + "swap_linear_with_smooth_fq_linear", |
| 107 | + "smooth_fq_linear_to_inference", |
| 108 | + "set_smooth_fq_attribute", |
| 109 | + "compute_error", |
| 110 | + |
| 111 | + # building blocks |
101 | 112 | "to_linear_activation_quantized",
|
102 | 113 | "to_weight_tensor_with_linear_activation_scale_metadata",
|
103 |
| - "float8_weight_only", |
104 |
| - "float8_dynamic_activation_float8_weight", |
105 |
| - "float8_static_activation_float8_weight", |
106 |
| - "Int8DynActInt4WeightGPTQQuantizer", |
107 |
| - "Int8DynActInt4WeightQuantizer", |
108 |
| - "Int8DynActInt4WeightLinear", |
109 |
| - "WeightOnlyInt8QuantLinear", |
110 |
| - "TwoStepQuantizer", |
111 |
| - "Quantizer", |
112 |
| - "ZeroPointDomain", |
113 |
| - "MappingType", |
114 | 114 | "AffineQuantizedMinMaxObserver",
|
115 | 115 | "AffineQuantizedObserverBase",
|
| 116 | + |
| 117 | + # quant primitive ops |
| 118 | + "choose_qprams_affine", |
| 119 | + "choose_qparams_affine_with_min_max", |
| 120 | + "choose_qparams_affine_floatx", |
| 121 | + "quantize_affine", |
| 122 | + "quantize_affine_floatx", |
| 123 | + "dequantize_affine", |
| 124 | + "dequantize_affine_floatx", |
| 125 | + "choose_qparams_and_quantize_affine_hqq", |
| 126 | + "fake_quantize_affine", |
| 127 | + "fake_quantize_affine_cachemask", |
| 128 | + |
| 129 | + # operators/kernels |
| 130 | + "safe_int_mm", |
| 131 | + "int_scaled_matmul", |
| 132 | + |
| 133 | + # dataclasses and types |
| 134 | + "MappingType", |
| 135 | + "ZeroPointDomain", |
| 136 | + "TorchAODType", |
116 | 137 | "PerTensor",
|
117 | 138 | "PerAxis",
|
118 | 139 | "PerGroup",
|
119 | 140 | "PerRow",
|
120 | 141 | "PerToken",
|
| 142 | + |
| 143 | + "LinearActivationQuantizedTensor", |
| 144 | + "Int4WeightOnlyGPTQQuantizer", |
| 145 | + "Int4WeightOnlyQuantizer", |
| 146 | + "Int8DynActInt4WeightGPTQQuantizer", |
| 147 | + "Int8DynActInt4WeightQuantizer", |
| 148 | + "Int8DynActInt4WeightLinear", |
| 149 | + "WeightOnlyInt8QuantLinear", |
| 150 | + "TwoStepQuantizer", |
| 151 | + "Quantizer", |
121 | 152 | ]
|
0 commit comments