URL Issues

Sangwan70 · Dec 26, 2024 · c4b6c0f · c4b6c0f
1 parent 1431b29
commit c4b6c0f
Show file tree

Hide file tree

Showing 8 changed files with 89,604 additions and 784 deletions.
diff --git a/part_5/01_main-code/load-finetuned-model.ipynb b/part_5/01_main-code/load-finetuned-model.ipynb
@@ -228,7 +228,7 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.12 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -242,7 +242,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.12.6"
   }
  },
  "nbformat": 4,

diff --git a/part_5/04_user_interface/.app.py.swp b/part_5/04_user_interface/.app.py.swp
diff --git a/part_5/04_user_interface/app.py b/part_5/04_user_interface/app.py
@@ -1,5 +1,5 @@
 # Copyright (c) The SkillPedia under Apache License 2.0 (see LICENSE.txt).
-# Source for "Build a Large Language Model From Scratch"
+# Source for "Building a Large Language Model From Scratch"
 #   
 # Code: https://github.com/Sangwan70/Building-an-LLM-From-Scratch
 
@@ -20,36 +20,36 @@
 
 def get_model_and_tokenizer():
     """
-    Code to load finetuned GPT-2 model generated in part 6.
-    This requires that you run the code in part 6 first, which generates the necessary model.pth file.
+    Code to load finetuned GPT-2 model generated in part 5.
+    This requires that you run the code in part 5 first, which generates the necessary model.pth file.
     """
 
-    GPT_CONFIG_124M = {
+    GPT_CONFIG_355M = {
         "vocab_size": 50257,     # Vocabulary size
         "context_length": 1024,  # Context length
-        "emb_dim": 768,          # Embedding dimension
-        "n_heads": 12,           # Number of attention heads
-        "n_layers": 12,          # Number of layers
+        "emb_dim": 1024,          # Embedding dimension
+        "n_heads": 16,           # Number of attention heads
+        "n_layers": 24,          # Number of layers
         "drop_rate": 0.1,        # Dropout rate
         "qkv_bias": True         # Query-key-value bias
     }
 
     tokenizer = tiktoken.get_encoding("gpt2")
 
-    model_path = Path("..") / "01_main-code" / "review_classifier.pth"
+    model_path = Path("../..") / "gpt2" / "review_classifier.pth"
     if not model_path.exists():
         print(
-            f"Could not find the {model_path} file. Please run the part 6 code"
-            " (part_5.ipynb) to generate the review_classifier.pth file."
+            f"""Could not find the {model_path} file. Please run the part 5 code
+             (part_5.ipynb) to generate the review_classifier.pth file."""
         )
         sys.exit()
 
     # Instantiate model
-    model = GPTModel(GPT_CONFIG_124M)
+    model = GPTModel(GPT_CONFIG_355M)
 
-    # Convert model to classifier as in section 6.5 in part_5.ipynb
+    # Convert model to classifier as in section 5.5 in part_5.ipynb
     num_classes = 2
-    model.out_head = torch.nn.Linear(in_features=GPT_CONFIG_124M["emb_dim"], out_features=num_classes)
+    model.out_head = torch.nn.Linear(in_features=GPT_CONFIG_355M["emb_dim"], out_features=num_classes)
 
     # Then load model weights
     checkpoint = torch.load(model_path, map_location=device, weights_only=True)

diff --git a/part_6/01_main-code/english_train.json b/part_6/01_main-code/english_train.json
diff --git a/part_6/01_main-code/gpt_instruction_finetuning.py b/part_6/01_main-code/gpt_instruction_finetuning.py
@@ -1,5 +1,5 @@
 # Copyright (c) The SkillPedia under Apache License 2.0 (see LICENSE.txt).
-# Source for "Build a Large Language Model From Scratch"
+# Source for "Building a Large Language Model From Scratch"
 #   
 # Code: https://github.com/Sangwan70/Building-an-LLM-From-Scratch
 #
@@ -166,8 +166,8 @@ def main(test_mode=False):
     #######################################
     # Download and prepare dataset
     #######################################
-    file_path = "instruction-data.json"
-    url = "https://raw.githubusercontent.com/Sangwan70/Building-an-LLM-From-Scratch/refs/heads/main/part_6/01_main-code/instruction-data.json"
+    file_path = "instruction-data-with-response.json"
+    url = "https://raw.githubusercontent.com/Sangwan70/Building-an-LLM-From-Scratch/refs/heads/main/part_6/01_main-code/instruction-data-with-response.json"
     data = download_and_load_file(file_path, url)
 
     train_portion = int(len(data) * 0.85)  # 85% for training

diff --git a/part_6/01_main-code/instruction-data-with-response.json b/part_6/01_main-code/instruction-data-with-response.json