Skip to content

Commit 346e17f

Browse files
authoredJan 16, 2025
Merge pull request #224 from macrocosm-os/dev
Release 5.0.0
2 parents 28b37f6 + d2ecc0d commit 346e17f

16 files changed

+1202
-981
lines changed
 

‎constants/__init__.py

+265-90
Large diffs are not rendered by default.

‎neurons/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def validator_config():
6161
)
6262
parser.add_argument(
6363
"--netuid",
64-
type=str,
64+
type=int,
6565
default=constants.SUBNET_UID,
6666
help="The subnet UID.",
6767
)

‎neurons/miner.py

+43-44
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,18 @@
1616
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1717
# DEALINGS IN THE SOFTWARE.
1818

19+
import argparse
1920
import asyncio
21+
import datetime as dt
2022
import math
2123
import os
2224
import random
2325
import typing
2426

25-
26-
import wandb
27+
import bittensor as bt
2728
import torch
28-
29-
import argparse
30-
import constants
31-
29+
import wandb
30+
from dotenv import load_dotenv
3231
from taoverse.metagraph import utils as metagraph_utils
3332
from taoverse.model.storage.chain.chain_model_metadata_store import (
3433
ChainModelMetadataStore,
@@ -37,15 +36,14 @@
3736
HuggingFaceModelStore,
3837
)
3938
from taoverse.model.storage.model_metadata_store import ModelMetadataStore
39+
from taoverse.utilities import logging
40+
from taoverse.utilities import utils as taoverse_utils
4041
from taoverse.utilities.enum_action import IntEnumAction
41-
from competitions.data import CompetitionId
42-
43-
import pretrain as pt
44-
import bittensor as bt
4542
from transformers import PreTrainedModel
46-
import datetime as dt
4743

48-
from dotenv import load_dotenv
44+
import constants
45+
import pretrain as pt
46+
from competitions.data import CompetitionId
4947

5048
load_dotenv() # take environment variables from .env.
5149

@@ -132,9 +130,7 @@ def get_config():
132130
parser.add_argument(
133131
"--bs", type=int, default=constants.batch_size, help="Batch size"
134132
)
135-
parser.add_argument(
136-
"--sl", type=int, default=constants.SEQUENCE_LENGTH_2, help="Sequence length"
137-
)
133+
parser.add_argument("--sl", type=int, default=4096, help="Sequence length")
138134
parser.add_argument(
139135
"--accumulation_steps",
140136
type=int,
@@ -149,7 +145,7 @@ def get_config():
149145
)
150146
parser.add_argument(
151147
"--netuid",
152-
type=str,
148+
type=int,
153149
default=constants.SUBNET_UID,
154150
help="The subnet UID.",
155151
)
@@ -196,7 +192,7 @@ async def load_starting_model(
196192
metagraph=metagraph,
197193
metadata_store=metadata_store,
198194
)
199-
bt.logging.success(
195+
logging.info(
200196
f"Training with best model from competition: {config.competition_id}. Model={str(model)}"
201197
)
202198
return model
@@ -210,33 +206,37 @@ async def load_starting_model(
210206
metagraph=metagraph,
211207
metadata_store=metadata_store,
212208
)
213-
bt.logging.success(
209+
logging.info(
214210
f"Training with model from uid: {config.load_uid}. Model={str(model)}"
215211
)
216212
return model
217213

218214
# Check if we should load a model from a local directory.
219215
if config.load_model_dir:
220216
model = pt.mining.load_local_model(config.load_model_dir, kwargs)
221-
bt.logging.success(f"Training with model from disk. Model={str(model)}")
217+
logging.info(f"Training with model from disk. Model={str(model)}")
222218
return model
223219

224220
# Check if we should load a model from a local file.
225221
if config.load_model:
226222
model = pt.mining.load_gpt2_model(config.load_model)
227-
bt.logging.success(f"Training with model from disk. Model={str(model)}")
223+
logging.info(f"Training with model from disk. Model={str(model)}")
228224
return model
229225

230226
# Start from scratch.
231227
model = pt.model.get_model()
232-
bt.logging.success(f"Training from scratch. Model={str(model)}")
228+
logging.info(f"Training from scratch. Model={str(model)}")
233229

234230
return model
235231

236232

237233
async def main(config: bt.config):
234+
raise NotImplementedError("You must implement your own training logic in miner.py")
235+
238236
# Create bittensor objects.
239-
bt.logging(config=config)
237+
bt.logging.set_warning()
238+
taoverse_utils.logging.reinitialize()
239+
taoverse_utils.configure_logging(config)
240240

241241
wallet = bt.wallet(config=config)
242242
subtensor = bt.subtensor(config=config)
@@ -250,7 +250,7 @@ async def main(config: bt.config):
250250
# If running online, make sure the miner is registered, has a hugging face access token, and has provided a repo id.
251251
my_uid = None
252252
if not config.offline:
253-
my_uid = meta_utils.assert_registered(wallet, metagraph)
253+
my_uid = metagraph_utils.assert_registered(wallet, metagraph)
254254
HuggingFaceModelStore.assert_access_token_exists()
255255

256256
# Create a unique run id for this run.
@@ -261,7 +261,7 @@ async def main(config: bt.config):
261261
use_wandb = False
262262
if not config.offline:
263263
if config.wandb_project is None or config.wandb_entity is None:
264-
bt.logging.warning(
264+
logging.warning(
265265
"Wandb project or entity not specified. This run will not be logged to wandb"
266266
)
267267
else:
@@ -273,17 +273,17 @@ async def main(config: bt.config):
273273

274274
if not model_constraints:
275275
raise RuntimeError(f"No competition found for {config.competition_id}")
276-
276+
277277
kwargs = model_constraints.kwargs.copy()
278-
278+
279279
# Init model.
280280
# Init model.
281281
tokenizer = pt.model.load_tokenizer(model_constraints, cache_dir=config.model_dir)
282282
model = await load_starting_model(config, metagraph, chain_metadata_store, kwargs)
283283
model = model.train()
284284
model = model.to(config.device)
285285

286-
bt.logging.success(f"Saving model to path: {model_dir}.")
286+
logging.info(f"Saving model to path: {model_dir}.")
287287
pt.mining.save(model, model_dir)
288288

289289
# Build optimizer
@@ -308,7 +308,7 @@ async def main(config: bt.config):
308308
"uid": my_uid,
309309
"hotkey": wallet.hotkey.ss58_address,
310310
"run_name": run_id,
311-
"version": constants.__version__,
311+
"version": constants.__version__,
312312
"type": "miner",
313313
},
314314
allow_val_change=True,
@@ -318,7 +318,7 @@ async def main(config: bt.config):
318318
# This is not seen by validators.
319319
wandb_run.save(os.path.join(model_dir, "*"), base_path=model_dir, policy="end")
320320
else:
321-
bt.logging.warning(
321+
logging.warning(
322322
"Not posting run to wandb. Either --offline is specified or the wandb settings are missing."
323323
)
324324

@@ -335,7 +335,7 @@ async def main(config: bt.config):
335335
epoch_loss = 0.0
336336

337337
# Prepare the data loader with random pages for each epoch
338-
bt.logging.success(
338+
logging.info(
339339
f"Loading {config.pages_per_epoch} pages for training this epoch"
340340
)
341341
random_pages = [
@@ -346,7 +346,7 @@ async def main(config: bt.config):
346346
# Change this loader if you wish to use a different dataset
347347
loader = pt.dataset.SubsetFineWebEdu2Loader(
348348
batch_size=config.bs,
349-
sequence_length=config.sl
349+
sequence_length=config.sl,
350350
num_pages=config.pages_per_epoch,
351351
tokenizer=tokenizer,
352352
)
@@ -369,7 +369,7 @@ async def main(config: bt.config):
369369
n_acc_steps += 1
370370
optimizer.step() # Perform a single optimization step
371371
optimizer.zero_grad() # Clear gradients
372-
bt.logging.success(
372+
logging.info(
373373
f"Step: {n_acc_steps} loss: {outputs.loss.detach().item()}"
374374
)
375375
if use_wandb:
@@ -388,47 +388,46 @@ async def main(config: bt.config):
388388
avg_loss = epoch_loss / n_batches
389389

390390
# Log the average loss for the epoch
391-
bt.logging.success(f"Epoch: {epoch_step} average loss: {avg_loss}")
391+
logging.info(f"Epoch: {epoch_step} average loss: {avg_loss}")
392392
epoch_step += 1
393393

394394
# Check if the average loss of this epoch is the best we've seen so far
395395
if avg_loss < best_avg_loss:
396396
best_avg_loss = avg_loss # Update the best average loss
397397

398-
bt.logging.success(f"New best average loss: {best_avg_loss}.")
398+
logging.info(f"New best average loss: {best_avg_loss}.")
399399

400400
# Save the model to your mining dir.
401-
bt.logging.success(f"Saving model to path: {model_dir}.")
401+
logging.info(f"Saving model to path: {model_dir}.")
402402
pt.mining.save(model, model_dir)
403403

404-
bt.logging.success("Finished training")
404+
logging.info("Finished training")
405405
# Push the model to your run.
406406
if not config.offline:
407407
if best_avg_loss < config.avg_loss_upload_threshold:
408-
bt.logging.success(
408+
logging.info(
409409
f"Trained model had a best_avg_loss of {best_avg_loss} which is below the threshold of {config.avg_loss_upload_threshold}. Uploading to hugging face. "
410410
)
411411

412412
# First, reload the best model from the training run.
413413
model_to_upload = pt.mining.load_local_model(
414414
model_dir, model_constraints.kwargs
415415
)
416-
416+
417417
await pt.mining.push(
418418
model_to_upload,
419419
config.hf_repo_id,
420-
wallet,
420+
wallet,
421421
config.competition_id,
422422
metadata_store=chain_metadata_store,
423-
use_hotkey_in_hash=config.use_hotkey_in_hash,
424423
)
425-
424+
426425
else:
427-
bt.logging.success(
426+
logging.info(
428427
f"This training run achieved a best_avg_loss={best_avg_loss}, which did not meet the upload threshold. Not uploading to hugging face."
429428
)
430429
else:
431-
bt.logging.success(
430+
logging.info(
432431
"Not uploading to hugging face because --offline was specified."
433432
)
434433

@@ -440,7 +439,7 @@ async def main(config: bt.config):
440439

441440
if __name__ == "__main__":
442441
# Parse and print configuration
443-
config = neuron_config.miner_config()
442+
config = get_config()
444443

445444
if config.list_competitions:
446445
print(constants.COMPETITION_SCHEDULE_BY_BLOCK)

0 commit comments

Comments
 (0)