Skip to content

Commit b1f4f7a

Browse files
Fixed pre-commit problems, fixed small bug in logging_config to handle LOG_LEVEL env var
1 parent 83237b8 commit b1f4f7a

File tree

8 files changed

+14
-15
lines changed

8 files changed

+14
-15
lines changed

Diff for: scripts/finetune.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717
from optimum.bettertransformer import BetterTransformer
1818
from transformers import GenerationConfig, TextStreamer
1919

20+
from axolotl.logging_config import configure_logging
2021
from axolotl.utils.data import load_prepare_datasets, load_pretraining_dataset
2122
from axolotl.utils.dict import DictDefault
2223
from axolotl.utils.models import load_model, load_tokenizer
2324
from axolotl.utils.tokenization import check_dataset_labels
2425
from axolotl.utils.trainer import setup_trainer
2526
from axolotl.utils.validation import validate_config
2627
from axolotl.utils.wandb import setup_wandb_env_vars
27-
from axolotl.logging_config import configure_logging
2828

2929
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
3030
src_dir = os.path.join(project_root, "src")

Diff for: src/axolotl/datasets.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
LOG = logging.getLogger("axolotl")
1818

19+
1920
class TokenizedPromptDataset(IterableDataset):
2021
"""
2122
Iterable dataset that returns tokenized prompts from a stream of text files.

Diff for: src/axolotl/logging_config.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
"""Logging configuration settings"""
2+
3+
import os
14
import sys
25
from logging.config import dictConfig
36
from typing import Any, Dict
@@ -18,7 +21,7 @@
1821
"stream": sys.stdout,
1922
},
2023
},
21-
"root": {"handlers": ["console"], "level": "INFO"},
24+
"root": {"handlers": ["console"], "level": os.getenv("LOG_LEVEL", "INFO")},
2225
}
2326

2427

Diff for: src/axolotl/monkeypatch/llama_landmark_attn.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
logging,
5353
replace_return_docstrings,
5454
)
55+
5556
LOG = logging.getLogger("axolotl")
5657

5758
_CONFIG_FOR_DOC = "LlamaConfig"
@@ -861,7 +862,7 @@ def forward(
861862

862863
if self.gradient_checkpointing and self.training:
863864
if use_cache:
864-
logger.warning_once(
865+
LOG.warning_once(
865866
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
866867
)
867868
use_cache = False

Diff for: src/axolotl/prompt_strategies/pygmalion.py

+2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
tokenize_prompt_default,
1212
)
1313

14+
LOG = logging.getLogger("axolotl")
15+
1416
IGNORE_TOKEN_ID = -100
1517

1618

Diff for: src/axolotl/prompters.py

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from enum import Enum, auto
66
from typing import Generator, List, Optional, Tuple, Union
77

8+
LOG = logging.getLogger("axolotl")
89
IGNORE_TOKEN_ID = -100
910

1011

Diff for: src/axolotl/utils/data.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,7 @@ def load_tokenized_prepared_datasets(
258258
suffix = ""
259259
if ":load_" in d.type:
260260
suffix = f" Did you mean {d.type.replace(':load_', '.load_')}?"
261-
LOG.error(
262-
f"unhandled prompt tokenization strategy: {d.type}. {suffix}"
263-
)
261+
LOG.error(f"unhandled prompt tokenization strategy: {d.type}. {suffix}")
264262
raise ValueError(
265263
f"unhandled prompt tokenization strategy: {d.type} {suffix}"
266264
)
@@ -271,9 +269,7 @@ def load_tokenized_prepared_datasets(
271269
samples = samples + list(d)
272270
dataset = Dataset.from_list(samples).shuffle(seed=seed)
273271
if cfg.local_rank == 0:
274-
LOG.info(
275-
f"Saving merged prepared dataset to disk... {prepared_ds_path}"
276-
)
272+
LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}")
277273
dataset.save_to_disk(prepared_ds_path)
278274
if cfg.push_dataset_to_hub:
279275
LOG.info(
@@ -366,9 +362,7 @@ def load_prepare_datasets(
366362
[dataset],
367363
seq_length=max_packed_sequence_len,
368364
)
369-
LOG.info(
370-
f"packing master dataset to len: {cfg.max_packed_sequence_len}"
371-
)
365+
LOG.info(f"packing master dataset to len: {cfg.max_packed_sequence_len}")
372366
dataset = Dataset.from_list(list(constant_len_dataset))
373367

374368
# filter out bad data

Diff for: tests/test_prompt_tokenizers.py

-3
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@
1616
ShareGPTPromptTokenizingStrategy,
1717
)
1818
from axolotl.prompters import AlpacaPrompter, PromptStyle, ShareGPTPrompter
19-
from axolotl.logging_config import configure_logging
20-
21-
configure_logging()
2219

2320
LOG = logging.getLogger("axolotl")
2421

0 commit comments

Comments
 (0)