Skip to content

Commit 877c43e

Browse files
committedMar 20, 2025
Bring over graph/multigpu changes from tokensurgeon experiments
1 parent 5839c12 commit 877c43e

18 files changed

+446
-271
lines changed
 

‎mergekit/architecture/__init__.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
if TYPE_CHECKING:
2323
from mergekit.config import MergeConfiguration
2424

25-
logger = logging.getLogger(__name__)
25+
LOG = logging.getLogger(__name__)
2626

2727

2828
def arch_info_for_config(config: PretrainedConfig) -> Optional[ModelArchitecture]:
@@ -44,11 +44,11 @@ def arch_info_for_config(config: PretrainedConfig) -> Optional[ModelArchitecture
4444
for c in candidates:
4545
if c.expected_model_type == config.model_type:
4646
return c
47-
logger.warning(
47+
LOG.warning(
4848
f"Multiple architectures for {arch_name}, none match model type {config.model_type}"
4949
)
5050

51-
logger.warning(f"No JSON architecture found for {arch_name}")
51+
LOG.warning(f"No JSON architecture found for {arch_name}")
5252
return None
5353

5454

‎mergekit/architecture/auto.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
RE_LAYER_INDEX = re.compile(r"\.(\d+)\.")
2323

24-
logger = logging.getLogger(__name__)
24+
LOG = logging.getLogger(__name__)
2525

2626

2727
def get_model_tensor_names(model: ModelReference, options: MergeOptions) -> List[str]:

‎mergekit/evo/actors.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from mergekit.options import MergeOptions
4242
from mergekit.plan import MergePlanner
4343

44-
logger = logging.getLogger(__name__)
44+
LOG = logging.getLogger(__name__)
4545

4646

4747
class MergeActorBase:
@@ -91,18 +91,18 @@ def evaluate_genotype(
9191
) -> dict:
9292
gc.collect()
9393
torch.cuda.empty_cache()
94-
logger.info("Merging model")
94+
LOG.info("Merging model")
9595
merged_path = merge_model(
9696
genotype, self.genome, self.model_storage_path, self.merge_options
9797
)
9898
if not merged_path:
99-
logger.error("Model merge failed")
99+
LOG.error("Model merge failed")
100100
return {"score": None, "results": None}
101101

102102
model_kwargs = {}
103103
if self.quantization_config is not None:
104104
model_kwargs["quantization_config"] = self.quantization_config
105-
logger.info(f"Model merged to {merged_path}")
105+
LOG.info(f"Model merged to {merged_path}")
106106
return evaluate_model(
107107
merged_path,
108108
self.config.tasks,
@@ -167,7 +167,7 @@ def _maybe_init_model(self, config: MergeConfiguration):
167167
continue
168168

169169
if getattr(cfg_out, key) != getattr(self.arch_info.config, key, None):
170-
logger.warning(f"Config key {key} changed, reinitializing model")
170+
LOG.warning(f"Config key {key} changed, reinitializing model")
171171
different = True
172172
break
173173

@@ -206,7 +206,7 @@ def _maybe_init_model(self, config: MergeConfiguration):
206206
del inner_model
207207
tokenizer_donor = self.genome.definition.base_model
208208
if tokenizer_donor is None:
209-
logger.warning(
209+
LOG.warning(
210210
"Base model not set, using tokenizer from first model in genome"
211211
)
212212
tokenizer_donor = self.genome.definition.models[0]
@@ -224,7 +224,7 @@ def _maybe_init_model(self, config: MergeConfiguration):
224224
max_model_len = min(max_model_len or 1024, window_sz)
225225
if max_model_len and max_model_len > 8192:
226226
max_model_len = 8192
227-
logger.warning(f"Clipping sequence length to {max_model_len}")
227+
LOG.warning(f"Clipping sequence length to {max_model_len}")
228228

229229
mem_util = (
230230
0.7 if self.merge_options.cuda else 0.9
@@ -248,13 +248,13 @@ def _maybe_init_model(self, config: MergeConfiguration):
248248
if ai
249249
else None
250250
)
251-
logger.info("Model initialized")
251+
LOG.info("Model initialized")
252252

253253
def evaluate(self, genotype: torch.Tensor) -> dict:
254254
try:
255255
config = self.genome.genotype_merge_config(genotype)
256256
except InvalidGenotypeError as e:
257-
logger.error("Invalid genotype", exc_info=e)
257+
LOG.error("Invalid genotype", exc_info=e)
258258
return {"score": None, "results": None}
259259

260260
self._maybe_init_model(config)

0 commit comments

Comments
 (0)