Skip to content

Commit bb03596

Browse files
committedMar 18, 2025
Allow multiple -v for all scripts and standardize logger->LOG
1 parent b961335 commit bb03596

12 files changed

+79
-68
lines changed
 

‎mergekit/architecture/__init__.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
if TYPE_CHECKING:
2323
from mergekit.config import MergeConfiguration
2424

25-
logger = logging.getLogger(__name__)
25+
LOG = logging.getLogger(__name__)
2626

2727

2828
def arch_info_for_config(config: PretrainedConfig) -> Optional[ModelArchitecture]:
@@ -44,11 +44,11 @@ def arch_info_for_config(config: PretrainedConfig) -> Optional[ModelArchitecture
4444
for c in candidates:
4545
if c.expected_model_type == config.model_type:
4646
return c
47-
logger.warning(
47+
LOG.warning(
4848
f"Multiple architectures for {arch_name}, none match model type {config.model_type}"
4949
)
5050

51-
logger.warning(f"No JSON architecture found for {arch_name}")
51+
LOG.warning(f"No JSON architecture found for {arch_name}")
5252
return None
5353

5454

‎mergekit/architecture/auto.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
RE_LAYER_INDEX = re.compile(r"\.(\d+)\.")
2323

24-
logger = logging.getLogger(__name__)
24+
LOG = logging.getLogger(__name__)
2525

2626

2727
def get_model_tensor_names(model: ModelReference, options: MergeOptions) -> List[str]:

‎mergekit/evo/actors.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from mergekit.options import MergeOptions
4242
from mergekit.plan import MergePlanner
4343

44-
logger = logging.getLogger(__name__)
44+
LOG = logging.getLogger(__name__)
4545

4646

4747
class MergeActorBase:
@@ -91,18 +91,18 @@ def evaluate_genotype(
9191
) -> dict:
9292
gc.collect()
9393
torch.cuda.empty_cache()
94-
logger.info("Merging model")
94+
LOG.info("Merging model")
9595
merged_path = merge_model(
9696
genotype, self.genome, self.model_storage_path, self.merge_options
9797
)
9898
if not merged_path:
99-
logger.error("Model merge failed")
99+
LOG.error("Model merge failed")
100100
return {"score": None, "results": None}
101101

102102
model_kwargs = {}
103103
if self.quantization_config is not None:
104104
model_kwargs["quantization_config"] = self.quantization_config
105-
logger.info(f"Model merged to {merged_path}")
105+
LOG.info(f"Model merged to {merged_path}")
106106
return evaluate_model(
107107
merged_path,
108108
self.config.tasks,
@@ -167,7 +167,7 @@ def _maybe_init_model(self, config: MergeConfiguration):
167167
continue
168168

169169
if getattr(cfg_out, key) != getattr(self.arch_info.config, key, None):
170-
logger.warning(f"Config key {key} changed, reinitializing model")
170+
LOG.warning(f"Config key {key} changed, reinitializing model")
171171
different = True
172172
break
173173

@@ -206,7 +206,7 @@ def _maybe_init_model(self, config: MergeConfiguration):
206206
del inner_model
207207
tokenizer_donor = self.genome.definition.base_model
208208
if tokenizer_donor is None:
209-
logger.warning(
209+
LOG.warning(
210210
"Base model not set, using tokenizer from first model in genome"
211211
)
212212
tokenizer_donor = self.genome.definition.models[0]
@@ -224,7 +224,7 @@ def _maybe_init_model(self, config: MergeConfiguration):
224224
max_model_len = min(max_model_len or 1024, window_sz)
225225
if max_model_len and max_model_len > 8192:
226226
max_model_len = 8192
227-
logger.warning(f"Clipping sequence length to {max_model_len}")
227+
LOG.warning(f"Clipping sequence length to {max_model_len}")
228228

229229
mem_util = (
230230
0.7 if self.merge_options.cuda else 0.9
@@ -248,13 +248,13 @@ def _maybe_init_model(self, config: MergeConfiguration):
248248
if ai
249249
else None
250250
)
251-
logger.info("Model initialized")
251+
LOG.info("Model initialized")
252252

253253
def evaluate(self, genotype: torch.Tensor) -> dict:
254254
try:
255255
config = self.genome.genotype_merge_config(genotype)
256256
except InvalidGenotypeError as e:
257-
logger.error("Invalid genotype", exc_info=e)
257+
LOG.error("Invalid genotype", exc_info=e)
258258
return {"score": None, "results": None}
259259

260260
self._maybe_init_model(config)

‎mergekit/io/tensor_writer.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import safetensors
1111
import torch
1212

13-
logger = logging.getLogger(__name__)
13+
LOG = logging.getLogger(__name__)
1414

1515

1616
class TensorWriter:
@@ -65,7 +65,7 @@ def _flush_current_shard(self):
6565
if not self.current_shard:
6666
return
6767

68-
logger.info(f"Writing shard #{self.shards_written+1} to disk")
68+
LOG.info(f"Writing shard #{self.shards_written+1} to disk")
6969

7070
prefix, extension = self._get_name_components()
7171
shard_name = f"{prefix}-{self.shards_written+1}.{extension}"
@@ -87,7 +87,7 @@ def finalize(self):
8787
with self.lock:
8888
self._flush_current_shard()
8989

90-
logger.info("Finalizing shard names")
90+
LOG.info("Finalizing shard names")
9191

9292
prefix, extension = self._get_name_components()
9393

@@ -154,7 +154,7 @@ def _do_save():
154154
and isinstance(e.args[0], str)
155155
and "share memory" in e.args[0]
156156
):
157-
logger.warning(
157+
LOG.warning(
158158
"Your model has duplicated tensors but the --clone-tensors "
159159
"flag is not set."
160160
)

‎mergekit/merge.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from mergekit.plan import MergePlanner
2525
from mergekit.tokenizer import TokenizerInfo
2626

27-
logger = logging.getLogger(__name__)
27+
LOG = logging.getLogger(__name__)
2828

2929

3030
def run_merge(
@@ -61,7 +61,7 @@ def run_merge(
6161
loader_cache.get(model)
6262
del pbar
6363

64-
logger.info("Planning operations")
64+
LOG.info("Planning operations")
6565
targets = MergePlanner(
6666
merge_config,
6767
arch_info,
@@ -94,7 +94,7 @@ def run_merge(
9494
cfg_out, arch_info, tokenizer, pad_to_multiple_of=pad_to_multiple_of
9595
)
9696

97-
logger.info("Saving config")
97+
LOG.info("Saving config")
9898
cfg_out.save_pretrained(out_path)
9999

100100
if options.write_model_card:
@@ -115,7 +115,7 @@ def run_merge(
115115
fp.write(config_source)
116116

117117
if tokenizer is not None:
118-
logger.info("Saving tokenizer")
118+
LOG.info("Saving tokenizer")
119119
_set_chat_template(tokenizer, merge_config)
120120
tokenizer.save_pretrained(out_path, safe_serialization=True)
121121
else:
@@ -125,12 +125,12 @@ def run_merge(
125125
merge_config, out_path, trust_remote_code=options.trust_remote_code
126126
)
127127
except Exception as e:
128-
logger.error(
128+
LOG.error(
129129
"Failed to copy tokenizer. The merge was still successful, just copy it from somewhere else.",
130130
exc_info=e,
131131
)
132132
elif merge_config.chat_template:
133-
logger.warning(
133+
LOG.warning(
134134
"Chat template specified but no tokenizer found. Chat template will not be saved."
135135
)
136136

@@ -180,13 +180,13 @@ def _set_chat_template(
180180
if template:
181181
model_templates.append(template.strip())
182182
except Exception as e:
183-
logger.warning(f"Unable to load tokenizer for {model}", exc_info=e)
183+
LOG.warning(f"Unable to load tokenizer for {model}", exc_info=e)
184184

185185
if not model_templates:
186186
return
187187

188188
chat_template = Counter(model_templates).most_common(1)[0][0]
189-
logger.info(f"Auto-selected chat template: {chat_template}")
189+
LOG.info(f"Auto-selected chat template: {chat_template}")
190190

191191
elif importlib.resources.is_resource(chat_templates, chat_template + ".jinja"):
192192
with importlib.resources.open_text(
@@ -210,7 +210,7 @@ def _copy_tagalong_files(
210210

211211
for file_name in files:
212212
if os.path.exists(os.path.join(donor_model.model.path, file_name)):
213-
logger.info(f"Copying {file_name} from {donor_model}")
213+
LOG.info(f"Copying {file_name} from {donor_model}")
214214
shutil.copy(
215215
os.path.join(donor_model.model.path, file_name),
216216
os.path.join(out_path, file_name),
@@ -234,7 +234,7 @@ def _copy_tokenizer(
234234
or os.path.exists(os.path.join(donor_model.model.path, "tokenizer.model"))
235235
)
236236
):
237-
logger.info(f"Copying tokenizer from {donor_model}")
237+
LOG.info(f"Copying tokenizer from {donor_model}")
238238

239239
for file_name in [
240240
"tokenizer_config.json",
@@ -253,7 +253,7 @@ def _copy_tokenizer(
253253
return
254254

255255
# fallback: try actually loading the tokenizer and saving it
256-
logger.info(f"Reserializing tokenizer from {donor_model}")
256+
LOG.info(f"Reserializing tokenizer from {donor_model}")
257257
tokenizer = transformers.AutoTokenizer.from_pretrained(
258258
donor_model.model.path,
259259
revision=donor_model.model.revision,
@@ -299,7 +299,7 @@ def _model_out_config(
299299
cfg_key = module_info.architecture.num_layers_config_key()
300300
set_config_value(res, cfg_key, module_layers[module_name])
301301
except Exception as e:
302-
logger.warning(
302+
LOG.warning(
303303
f"Unable to set number of layers for module {module_name} in output config "
304304
"- you may need to manually correct it.",
305305
exc_info=e,
@@ -322,7 +322,7 @@ def _update_config_vocab(
322322
config, arch_info.vocab_size_config_key or "vocab_size", vocab_size
323323
)
324324
except Exception as e:
325-
logger.warning(
325+
LOG.warning(
326326
"Unable to set vocabulary size in output config - you may need to manually correct it.",
327327
exc_info=e,
328328
)

‎mergekit/multigpu_executor.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
build_schedule,
3030
)
3131

32-
logger = logging.getLogger(__name__)
32+
LOG = logging.getLogger(__name__)
3333

3434

3535
class MultiGPUExecutor:
@@ -61,7 +61,7 @@ def __init__(
6161

6262
if num_gpus is None:
6363
num_gpus = torch.cuda.device_count()
64-
logger.info(f"Using {num_gpus} GPUs for parallel execution")
64+
LOG.info(f"Using {num_gpus} GPUs for parallel execution")
6565

6666
self.universe = TaskUniverse(tasks)
6767
self.targets = set([self.universe.get_handle(t) for t in tasks])
@@ -82,7 +82,7 @@ def __init__(
8282
for t in ordered_handles
8383
if (t not in trailing_tasks and t not in leading_tasks)
8484
]
85-
logger.info(
85+
LOG.info(
8686
f"Task breakdown: {len(self.leading_main_handles)} leading, "
8787
f"{len(parallel_handles)} parallel, "
8888
f"{len(self.trailing_main_handles)} trailing"
@@ -244,7 +244,7 @@ def _assign_islands_to_gpus(
244244
island_graph.add_nodes_from([t._index for t in tasks])
245245
island_graph.add_edges_from(edge_list)
246246
islands: List[Set[int]] = list(nx.weakly_connected_components(island_graph))
247-
logger.info(f"Found {len(islands)} islands in parallel task graph")
247+
LOG.info(f"Found {len(islands)} islands in parallel task graph")
248248
assignments: Dict[torch.device, List[int]] = {}
249249
for island in islands:
250250
if not island:
@@ -295,4 +295,4 @@ def _device_worker(
295295
result = None
296296
self.task_completion_queue.put((task_handle._index, result))
297297
torch.cuda.synchronize(device=device)
298-
logger.debug(f"Device {device} done")
298+
LOG.debug(f"Device {device} done")

‎mergekit/options.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,23 @@ class MergeOptions(BaseModel, frozen=True):
3030
lazy_unpickle: bool = False
3131
write_model_card: bool = True
3232
safe_serialization: bool = True
33-
verbose: bool = False
33+
verbosity: int = 0
3434
quiet: bool = False
3535
read_to_gpu: bool = False
3636
multi_gpu: bool = False
3737
num_threads: Optional[int] = None
3838
gpu_rich: bool = False
3939

4040
def apply_global_options(self):
41-
logging.basicConfig(level=logging.INFO if self.verbose else logging.WARNING)
41+
if self.verbosity > 1:
42+
log_level = logging.DEBUG
43+
elif self.verbosity == 1:
44+
log_level = logging.INFO
45+
else:
46+
log_level = logging.WARNING
47+
logging.basicConfig(level=log_level)
48+
if self.verbosity > 5:
49+
logging.debug("whoah buddy that's a lot of verbosity, two is plenty")
4250
if self.random_seed is not None:
4351
transformers.trainer_utils.set_seed(self.random_seed)
4452
if self.num_threads is not None:
@@ -74,7 +82,7 @@ def handle_gpu_rich(cls, value):
7482
"read_to_gpu": "Read model weights directly to GPU",
7583
"multi_gpu": "Use multi-gpu parallel graph execution engine",
7684
"num_threads": "Number of threads to use for parallel CPU operations",
77-
"verbose": "Enable verbose logging",
85+
"verbosity": "Verbose logging (repeat for more verbosity)",
7886
"gpu_rich": "Alias for --cuda --low-cpu-memory --read-to-gpu --multi-gpu",
7987
}
8088

@@ -96,7 +104,7 @@ def handle_gpu_rich(cls, value):
96104
"trust_remote_code": "Dangerous Options",
97105
"allow_crimes": "Dangerous Options",
98106
"random_seed": "Miscellaneous",
99-
"verbose": "Miscellaneous",
107+
"verbosity": "Miscellaneous",
100108
"quiet": "Miscellaneous",
101109
"lora_merge_dtype": "Miscellaneous",
102110
}
@@ -141,8 +149,9 @@ def wrapper(*args, **kwargs):
141149
arg_str = f"--{arg_name}"
142150
param_decls = [arg_str]
143151
kwargs = {}
144-
if field_name == "verbose":
145-
param_decls = ["--verbose/--no-verbose", "-v"]
152+
if field_name == "verbosity":
153+
param_decls = ["-v", "verbosity"]
154+
kwargs["count"] = True
146155
if field_name == "num_threads":
147156
param_decls = ["--num-threads", "-j"]
148157
if field_name == "gpu_rich":
@@ -155,7 +164,7 @@ def wrapper(*args, **kwargs):
155164
type=field_type,
156165
default=info.default,
157166
help=help_str,
158-
show_default=field_name != "out_shard_size",
167+
show_default=field_name not in ("out_shard_size", "verbosity"),
159168
**kwargs,
160169
)(wrapper)
161170

0 commit comments

Comments
 (0)