Skip to content

Commit baaa786

Browse files
committed
update modes for v3-5-f
1 parent 165c909 commit baaa786

File tree

1 file changed

+9
-12
lines changed

1 file changed

+9
-12
lines changed

exllamav2/exllamav2_ext/ext_quant.cpp

+9-12
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,9 @@ std::tuple<std::vector<std::tuple<uint64_t, float>>, std::vector<int>, float, ui
174174
)
175175
{
176176
// --- Mode-Specific Parameters ---
177-
enum Mode { MODE_BALANCED, MODE_UNIFORM, MODE_AGGRESSIVE, MODE_3_5_2, MODE_3_5_6, MODE_CUSTOM };
177+
enum Mode { MODE_RELAXED, MODE_BALANCED, MODE_UNIFORM, MODE_AGGRESSIVE, MODE_3_5_2, MODE_3_5_6, MODE_CUSTOM };
178178
// --- Mode Selection ---
179-
Mode mode = MODE_3_5_2; // Default mode, Can be changed into other mode or MODE_CUSTOM
179+
Mode mode = MODE_UNIFORM; // Default mode, Can be changed into other mode or MODE_CUSTOM
180180

181181
// Define a struct to hold parameters for different modes
182182
struct ModeParams {
@@ -198,23 +198,20 @@ std::tuple<std::vector<std::tuple<uint64_t, float>>, std::vector<int>, float, ui
198198

199199
// Define the parameter sets for each mode
200200
const std::vector<ModeParams> mode_params = {
201+
// MODE_RELAXED: Minize error first
202+
{0.1f, 3.0f, 0.05f, 0.0f, 1.2f, 5.0f, 0.1f, 0.0009f, 25, 15000, 3, 5, 0.5f, 3.3f},
203+
201204
// MODE_BALANCED: Balanced trade-off between BPW uniformity and error
202-
{0.6f, 3.2f, 0.1f, 0.0001f, 1.3f, 5.5f, 1.5f, 0.001f, 60, 30000, 8, 8, 0.75f, 3.5f},
205+
{0.6f, 3.3f, 0.12f, 0.0005f, 1.5f, 5.5f, 1.8f, 0.002f, 50, 30000, 8, 8, 0.75f, 3.6f},
203206

204207
// MODE_UNIFORM: Strong emphasis on BPW uniformity
205-
{0.8f, 3.5f, 0.12f, 0.0005f, 1.5f, 6.0f, 3.0f, 0.001f, 80, 40000, 8, 10, 0.8f, 3.7f},
208+
{0.8f, 3.5f, 0.12f, 0.0005f, 1.6f, 6.0f, 3.0f, 0.001f, 80, 40000, 8, 10, 0.8f, 3.7f},
206209

207210
// MODE_AGGRESSIVE: Aggressively avoids low BPW, potentially higher error
208-
{1.0f, 3.8f, 0.15f, 0.001f, 1.6f, 6.5f, 4.0f, 0.001f, 100, 50000, 8, 12, 0.9f, 3.9f},
209-
210-
// MODE_3_5_2: Approximates the behavior of Version 3-5-2
211-
{0.1f, 3.0f, 0.05f, 0.0f, 1.2f, 5.0f, 0.1f, 0.0009f, 25, 15000, 3, 5, 0.5f, 3.3f},
212-
213-
// MODE_3_5_6: Replicates the behavior of Version 3-5-6
214-
{0.6f, 3.3f, 0.12f, 0.0005f, 1.5f, 5.5f, 1.8f, 0.002f, 50, 30000, 8, 8, 0.75f, 3.6f},
211+
{1.0f, 3.8f, 0.15f, 0.001f, 1.7f, 6.5f, 4.0f, 0.001f, 100, 50000, 8, 12, 0.9f, 3.9f},
215212

216213
// MODE_CUSTOM: User-defined parameters, will be overwritten if using custom mode
217-
{0.6f, 3.2f, 0.1f, 0.0001f, 1.3f, 5.5f, 1.5f, 0.001f, 60, 30000, 8, 8, 0.75f, 3.5f}
214+
{0.8f, 5.0f, 0.12f, 0.0005f, 1.5f, 6.0f, 3.0f, 0.001f, 80, 40000, 8, 10, 0.8f, 5.5f},
218215
};
219216

220217
ModeParams params;

0 commit comments

Comments
 (0)