Skip to content

Commit

Permalink
URL Issues
Browse files Browse the repository at this point in the history
  • Loading branch information
AllianceSoftech committed Dec 26, 2024
1 parent 1431b29 commit c4b6c0f
Show file tree
Hide file tree
Showing 8 changed files with 89,604 additions and 784 deletions.
4 changes: 2 additions & 2 deletions part_5/01_main-code/load-finetuned-model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3.12 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -242,7 +242,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.12.6"
}
},
"nbformat": 4,
Expand Down
Binary file added part_5/04_user_interface/.app.py.swp
Binary file not shown.
26 changes: 13 additions & 13 deletions part_5/04_user_interface/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) The SkillPedia under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
# Source for "Building a Large Language Model From Scratch"
#
# Code: https://github.com/Sangwan70/Building-an-LLM-From-Scratch

Expand All @@ -20,36 +20,36 @@

def get_model_and_tokenizer():
"""
Code to load finetuned GPT-2 model generated in part 6.
This requires that you run the code in part 6 first, which generates the necessary model.pth file.
Code to load finetuned GPT-2 model generated in part 5.
This requires that you run the code in part 5 first, which generates the necessary model.pth file.
"""

GPT_CONFIG_124M = {
GPT_CONFIG_355M = {
"vocab_size": 50257, # Vocabulary size
"context_length": 1024, # Context length
"emb_dim": 768, # Embedding dimension
"n_heads": 12, # Number of attention heads
"n_layers": 12, # Number of layers
"emb_dim": 1024, # Embedding dimension
"n_heads": 16, # Number of attention heads
"n_layers": 24, # Number of layers
"drop_rate": 0.1, # Dropout rate
"qkv_bias": True # Query-key-value bias
}

tokenizer = tiktoken.get_encoding("gpt2")

model_path = Path("..") / "01_main-code" / "review_classifier.pth"
model_path = Path("../..") / "gpt2" / "review_classifier.pth"
if not model_path.exists():
print(
f"Could not find the {model_path} file. Please run the part 6 code"
" (part_5.ipynb) to generate the review_classifier.pth file."
f"""Could not find the {model_path} file. Please run the part 5 code
(part_5.ipynb) to generate the review_classifier.pth file."""
)
sys.exit()

# Instantiate model
model = GPTModel(GPT_CONFIG_124M)
model = GPTModel(GPT_CONFIG_355M)

# Convert model to classifier as in section 6.5 in part_5.ipynb
# Convert model to classifier as in section 5.5 in part_5.ipynb
num_classes = 2
model.out_head = torch.nn.Linear(in_features=GPT_CONFIG_124M["emb_dim"], out_features=num_classes)
model.out_head = torch.nn.Linear(in_features=GPT_CONFIG_355M["emb_dim"], out_features=num_classes)

# Then load model weights
checkpoint = torch.load(model_path, map_location=device, weights_only=True)
Expand Down
15,000 changes: 15,000 additions & 0 deletions part_6/01_main-code/english_train.json

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions part_6/01_main-code/gpt_instruction_finetuning.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) The SkillPedia under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
# Source for "Building a Large Language Model From Scratch"
#
# Code: https://github.com/Sangwan70/Building-an-LLM-From-Scratch
#
Expand Down Expand Up @@ -166,8 +166,8 @@ def main(test_mode=False):
#######################################
# Download and prepare dataset
#######################################
file_path = "instruction-data.json"
url = "https://raw.githubusercontent.com/Sangwan70/Building-an-LLM-From-Scratch/refs/heads/main/part_6/01_main-code/instruction-data.json"
file_path = "instruction-data-with-response.json"
url = "https://raw.githubusercontent.com/Sangwan70/Building-an-LLM-From-Scratch/refs/heads/main/part_6/01_main-code/instruction-data-with-response.json"
data = download_and_load_file(file_path, url)

train_portion = int(len(data) * 0.85) # 85% for training
Expand Down
3,120 changes: 2,460 additions & 660 deletions part_6/01_main-code/instruction-data-with-response.json

Large diffs are not rendered by default.

Loading

0 comments on commit c4b6c0f

Please sign in to comment.