Skip to content

Commit e5e18f9

Browse files
committed
feat(core): add GGUF splitting feature
- add ability to split GGUFs
1 parent cee4294 commit e5e18f9

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed

src/AutoGGUF.py

+117
Original file line numberDiff line numberDiff line change
@@ -211,12 +211,75 @@ def __init__(self, args: List[str]) -> None:
211211
self.fp8_layout.addWidget(quantize_button)
212212
self.fp8_dialog.setLayout(self.fp8_layout)
213213

214+
# Split GGUF Window
215+
self.split_gguf_dialog = QDialog(self)
216+
self.split_gguf_dialog.setWindowTitle(SPLIT_GGUF)
217+
self.split_gguf_dialog.setFixedWidth(500)
218+
self.split_gguf_layout = QVBoxLayout()
219+
220+
# Input path
221+
input_layout = QHBoxLayout()
222+
self.split_gguf_input = QLineEdit()
223+
input_button = QPushButton(BROWSE)
224+
input_button.clicked.connect(
225+
lambda: self.split_gguf_input.setText(
226+
QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER)
227+
)
228+
)
229+
input_layout.addWidget(QLabel(INPUT_MODEL))
230+
input_layout.addWidget(self.split_gguf_input)
231+
input_layout.addWidget(input_button)
232+
self.split_gguf_layout.addLayout(input_layout)
233+
234+
# Output path
235+
output_layout = QHBoxLayout()
236+
self.split_gguf_output = QLineEdit()
237+
output_button = QPushButton(BROWSE)
238+
output_button.clicked.connect(
239+
lambda: self.split_gguf_output.setText(
240+
QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER)
241+
)
242+
)
243+
output_layout.addWidget(QLabel(OUTPUT))
244+
output_layout.addWidget(self.split_gguf_output)
245+
output_layout.addWidget(output_button)
246+
self.split_gguf_layout.addLayout(output_layout)
247+
248+
# Split options
249+
split_options_layout = QHBoxLayout()
250+
self.split_max_size = QLineEdit()
251+
self.split_max_size.setPlaceholderText("Size in G/M")
252+
self.split_max_tensors = QLineEdit()
253+
self.split_max_tensors.setPlaceholderText("Number of tensors")
254+
split_options_layout.addWidget(QLabel(SPLIT_MAX_SIZE))
255+
split_options_layout.addWidget(self.split_max_size)
256+
split_options_layout.addWidget(QLabel(SPLIT_MAX_TENSORS))
257+
split_options_layout.addWidget(self.split_max_tensors)
258+
self.split_gguf_layout.addLayout(split_options_layout)
259+
260+
# Split button
261+
split_button = QPushButton(SPLIT_GGUF)
262+
split_button.clicked.connect(
263+
lambda: self.split_gguf(
264+
self.split_gguf_input.text(),
265+
self.split_gguf_output.text(),
266+
self.split_max_size.text(),
267+
self.split_max_tensors.text(),
268+
)
269+
)
270+
self.split_gguf_layout.addWidget(split_button)
271+
self.split_gguf_dialog.setLayout(self.split_gguf_layout)
272+
214273
# Tools menu
215274
tools_menu = self.menubar.addMenu("&Tools")
216275
autofp8_action = QAction("&AutoFP8", self)
217276
autofp8_action.setShortcut(QKeySequence("Shift+Q"))
218277
autofp8_action.triggered.connect(self.fp8_dialog.exec)
278+
split_gguf_action = QAction("&Split GGUF", self)
279+
split_gguf_action.setShortcut(QKeySequence("Shift+G"))
280+
split_gguf_action.triggered.connect(self.split_gguf_dialog.exec)
219281
tools_menu.addAction(autofp8_action)
282+
tools_menu.addAction(split_gguf_action)
220283

221284
# Content widget
222285
content_widget = QWidget()
@@ -1246,6 +1309,60 @@ def download_error(self, error_message) -> None:
12461309
if os.path.exists(partial_file):
12471310
os.remove(partial_file)
12481311

1312+
def split_gguf(
1313+
self, model_dir: str, output_dir: str, max_size: str, max_tensors: str
1314+
) -> None:
1315+
if not model_dir or not output_dir:
1316+
show_error(self.logger, f"{SPLIT_GGUF_ERROR}: {NO_MODEL_SELECTED}")
1317+
return
1318+
self.logger.info(SPLIT_GGUF_TASK_STARTED)
1319+
try:
1320+
command = [
1321+
"llama-gguf-split",
1322+
]
1323+
1324+
if max_size:
1325+
command.extend(["--split-max-size", max_size])
1326+
if max_tensors:
1327+
command.extend(["--split-max-tensors", max_tensors])
1328+
1329+
command.extend([model_dir, output_dir])
1330+
1331+
logs_path = self.logs_input.text()
1332+
ensure_directory(logs_path)
1333+
1334+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1335+
log_file = os.path.join(logs_path, f"gguf_split_{timestamp}.log")
1336+
1337+
thread = QuantizationThread(command, os.getcwd(), log_file)
1338+
self.quant_threads.append(thread)
1339+
1340+
task_name = SPLIT_GGUF_DYNAMIC.format(os.path.basename(model_dir))
1341+
task_item = TaskListItem(
1342+
task_name,
1343+
log_file,
1344+
show_progress_bar=False,
1345+
logger=self.logger,
1346+
quant_threads=self.quant_threads,
1347+
)
1348+
list_item = QListWidgetItem(self.task_list)
1349+
list_item.setSizeHint(task_item.sizeHint())
1350+
self.task_list.addItem(list_item)
1351+
self.task_list.setItemWidget(list_item, task_item)
1352+
1353+
thread.status_signal.connect(task_item.update_status)
1354+
thread.finished_signal.connect(
1355+
lambda: self.task_finished(thread, task_item)
1356+
)
1357+
thread.error_signal.connect(
1358+
lambda err: handle_error(self.logger, err, task_item)
1359+
)
1360+
thread.start()
1361+
1362+
except Exception as e:
1363+
show_error(self.logger, SPLIT_GGUF_ERROR.format(e))
1364+
self.logger.info(SPLIT_GGUF_TASK_FINISHED)
1365+
12491366
def verify_gguf(self, file_path) -> bool:
12501367
try:
12511368
with open(file_path, "rb") as f:

src/Localizations.py

+9
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,15 @@ def __init__(self):
396396
"HuggingFace to GGUF conversion task started"
397397
)
398398

399+
# Split GGUF
400+
self.SPLIT_GGUF = "Split GGUF"
401+
self.SPLIT_MAX_SIZE = "Split Max Size"
402+
self.SPLIT_MAX_TENSORS = "Split Max Tensors"
403+
self.SPLIT_GGUF_TASK_STARTED = "GGUF Split task started"
404+
self.SPLIT_GGUF_TASK_FINISHED = "GGUF Split task finished"
405+
self.SPLIT_GGUF_COMMAND = "GGUF Split Command"
406+
self.SPLIT_GGUF_ERROR = "Error starting GGUF split"
407+
399408

400409
class _French(_Localization):
401410
def __init__(self):

0 commit comments

Comments
 (0)