Skip to content

Commit 165c909

Browse files
committed
improvement v3-5-f
parameterize on a scale of higher min(bpw) or lower exp. error
1 parent bb120fc commit 165c909

File tree

1 file changed

+76
-15
lines changed

1 file changed

+76
-15
lines changed

exllamav2/exllamav2_ext/ext_quant.cpp

+76-15
Original file line numberDiff line numberDiff line change
@@ -173,21 +173,82 @@ std::tuple<std::vector<std::tuple<uint64_t, float>>, std::vector<int>, float, ui
173173
float norm
174174
)
175175
{
176-
// --- Enhanced Parameters ---
177-
const int redistribution_iterations = 50;
178-
const float bpw_penalty_scale = 0.6f; // Stronger penalty for low BPW
179-
const float min_bpw_base = 3.3f; // Higher base minimum BPW, we want higher bpw
180-
const int opportunistic_iterations = 30000;
181-
const float initial_opportunistic_temp = 0.12f;
182-
const float low_error_threshold = 0.002f;
183-
const float error_floor = 0.0005f;
184-
const float targeted_redistribution_bpw_threshold = 3.6f;
185-
const float targeted_redistribution_max_err_increase = 1.5f; // Increased tolerance for error increase in targeted redistribution
186-
const float high_bpw_donor_threshold = 5.5f;
187-
const int num_options_to_explore_per_layer = 8;
188-
const int bpw_smoothing_passes = 8;
189-
const float bpw_smoothing_threshold = 0.75f;
190-
const float bpw_balance_factor = 1.8f; // Control trade-off between BPW uniformity and error
176+
// --- Mode-Specific Parameters ---
177+
enum Mode { MODE_BALANCED, MODE_UNIFORM, MODE_AGGRESSIVE, MODE_3_5_2, MODE_3_5_6, MODE_CUSTOM };
178+
// --- Mode Selection ---
179+
Mode mode = MODE_3_5_2; // Default mode, Can be changed into other mode or MODE_CUSTOM
180+
181+
// Define a struct to hold parameters for different modes
182+
struct ModeParams {
183+
float bpw_penalty_scale;
184+
float min_bpw_base;
185+
float opportunistic_temp;
186+
float error_floor;
187+
float targeted_redistribution_max_err_increase;
188+
float high_bpw_donor_threshold;
189+
float bpw_balance_factor;
190+
float low_error_threshold;
191+
int redistribution_iterations;
192+
int opportunistic_iterations;
193+
int num_options_to_explore_per_layer;
194+
int bpw_smoothing_passes;
195+
float bpw_smoothing_threshold;
196+
float targeted_redistribution_bpw_threshold;
197+
};
198+
199+
// Define the parameter sets for each mode
200+
const std::vector<ModeParams> mode_params = {
201+
// MODE_BALANCED: Balanced trade-off between BPW uniformity and error
202+
{0.6f, 3.2f, 0.1f, 0.0001f, 1.3f, 5.5f, 1.5f, 0.001f, 60, 30000, 8, 8, 0.75f, 3.5f},
203+
204+
// MODE_UNIFORM: Strong emphasis on BPW uniformity
205+
{0.8f, 3.5f, 0.12f, 0.0005f, 1.5f, 6.0f, 3.0f, 0.001f, 80, 40000, 8, 10, 0.8f, 3.7f},
206+
207+
// MODE_AGGRESSIVE: Aggressively avoids low BPW, potentially higher error
208+
{1.0f, 3.8f, 0.15f, 0.001f, 1.6f, 6.5f, 4.0f, 0.001f, 100, 50000, 8, 12, 0.9f, 3.9f},
209+
210+
// MODE_3_5_2: Approximates the behavior of Version 3-5-2
211+
{0.1f, 3.0f, 0.05f, 0.0f, 1.2f, 5.0f, 0.1f, 0.0009f, 25, 15000, 3, 5, 0.5f, 3.3f},
212+
213+
// MODE_3_5_6: Replicates the behavior of Version 3-5-6
214+
{0.6f, 3.3f, 0.12f, 0.0005f, 1.5f, 5.5f, 1.8f, 0.002f, 50, 30000, 8, 8, 0.75f, 3.6f},
215+
216+
// MODE_CUSTOM: User-defined parameters, will be overwritten if using custom mode
217+
{0.6f, 3.2f, 0.1f, 0.0001f, 1.3f, 5.5f, 1.5f, 0.001f, 60, 30000, 8, 8, 0.75f, 3.5f}
218+
};
219+
220+
ModeParams params;
221+
if (mode == MODE_CUSTOM)
222+
{
223+
params = {0.7f, 3.3f, 0.11f, 0.0002f, 1.35f, 5.7f, 2.0f, 0.001f, 70, 35000, 8, 9, 0.8f, 3.6f}; // Example custom parameters, you should change this
224+
} else {
225+
params = mode_params[mode];
226+
}
227+
228+
// --- Parameter Application ---
229+
// (Consolidated parameters are grouped together)
230+
231+
// Penalty-related parameters
232+
const float bpw_penalty_scale = params.bpw_penalty_scale;
233+
const float min_bpw_base = params.min_bpw_base;
234+
const float bpw_balance_factor = params.bpw_balance_factor;
235+
236+
// Redistribution-related parameters
237+
const int redistribution_iterations = params.redistribution_iterations;
238+
const float targeted_redistribution_bpw_threshold = params.targeted_redistribution_bpw_threshold;
239+
const float targeted_redistribution_max_err_increase = params.targeted_redistribution_max_err_increase;
240+
const float high_bpw_donor_threshold = params.high_bpw_donor_threshold;
241+
const int num_options_to_explore_per_layer = params.num_options_to_explore_per_layer;
242+
243+
// Opportunistic optimization parameters
244+
const int opportunistic_iterations = params.opportunistic_iterations;
245+
const float initial_opportunistic_temp = params.opportunistic_temp;
246+
const float low_error_threshold = params.low_error_threshold;
247+
248+
// Other parameters
249+
const float error_floor = params.error_floor;
250+
const int bpw_smoothing_passes = params.bpw_smoothing_passes;
251+
const float bpw_smoothing_threshold = params.bpw_smoothing_threshold;
191252

192253
// --- Dynamic Minimum BPW ---
193254
auto calculate_dynamic_min_bpw = [&](float target_bpw, float temp_ratio) {

0 commit comments

Comments
 (0)