diff --git a/data/datasets/bart_searchgpt_wiki_nlp_augment/3_10k_bart_trial.py b/data/datasets/bart_searchgpt_wiki_nlp_augment/3_10k_bart_trial.py
index 12d539a2b7..d8afe37968 100644
--- a/data/datasets/bart_searchgpt_wiki_nlp_augment/3_10k_bart_trial.py
+++ b/data/datasets/bart_searchgpt_wiki_nlp_augment/3_10k_bart_trial.py
@@ -12,6 +12,7 @@ def num_tokens_from_string(string: str) -> int:
 
 
 if __name__ == "__main__":
+    # Use the `dtype` parameter of `pd.read_csv`.
     sampled_df = pd.read_csv("wiki_qa_bart_10000row_input.csv")
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     print(device)
diff --git a/data/datasets/bart_searchgpt_wiki_nlp_augment/4_convert_to_oa_format.py b/data/datasets/bart_searchgpt_wiki_nlp_augment/4_convert_to_oa_format.py
index b59bde8091..a517bef579 100644
--- a/data/datasets/bart_searchgpt_wiki_nlp_augment/4_convert_to_oa_format.py
+++ b/data/datasets/bart_searchgpt_wiki_nlp_augment/4_convert_to_oa_format.py
@@ -3,6 +3,7 @@
 import pandas as pd
 
 if __name__ == "__main__":
+    # Use the `dtype` parameter of `pd.read_csv`.
     raw_df = pd.read_csv(r"...\wiki_qa_bart_10000row.csv")
     # print(raw_df.iloc[0])
     # print(raw_df.columns)
diff --git a/data/datasets/mt_note_generation/prepare.py b/data/datasets/mt_note_generation/prepare.py
index ff5fcdc5a1..4021c587f9 100644
--- a/data/datasets/mt_note_generation/prepare.py
+++ b/data/datasets/mt_note_generation/prepare.py
@@ -63,6 +63,7 @@ def main(output_dir: str = "data"):
     """Download and prepare the dataset for use."""
     os.makedirs(output_dir, exist_ok=True)
     kaggle.api.dataset_download_files("tboyle10/medicaltranscriptions", "data", unzip=True)
+    # Use the `dtype` parameter of `pd.read_csv`.
     mt_samples = preprocess(pd.read_csv("data/mtsamples.csv"))
     conversations = get_conversations(mt_samples)
     random.shuffle(conversations)
diff --git a/data/datasets/poetry_instruction/prepare.py b/data/datasets/poetry_instruction/prepare.py
index 9a4718e2da..5667e4ba88 100644
--- a/data/datasets/poetry_instruction/prepare.py
+++ b/data/datasets/poetry_instruction/prepare.py
@@ -14,6 +14,7 @@
 
 # Read the CSV file into a pandas dataframe
 csv_file = os.path.join(download_path, "PoetryFoundationData.csv")
+# Use the `dtype` parameter of `pd.read_csv`.
 df = pd.read_csv(csv_file)
 
 # The data in the CSV file is not formatted correctly, so we need to clean it up.
diff --git a/data/datasets/zhihu-kol/convert_parquet.py b/data/datasets/zhihu-kol/convert_parquet.py
index 5d8c7b6971..a89354a7a9 100644
--- a/data/datasets/zhihu-kol/convert_parquet.py
+++ b/data/datasets/zhihu-kol/convert_parquet.py
@@ -43,6 +43,7 @@ def reformat_csv_to_openassistant(df: pd.DataFrame) -> pd.DataFrame:
 if __name__ == "__main__":
     input_csv = "zhihu.csv"
     # Create a pandas dataframe from your dataset file(s)
+    # Use the `dtype` parameter of `pd.read_csv`.
     df = pd.read_csv(input_csv)  # or any other way
     df = reformat_csv_to_openassistant(df)
     # Save the file in the Parquet format
diff --git a/scripts/data_augment/data_augment.py b/scripts/data_augment/data_augment.py
index 79072006ce..1e9e575fc2 100644
--- a/scripts/data_augment/data_augment.py
+++ b/scripts/data_augment/data_augment.py
@@ -458,6 +458,7 @@ def parse_arguments():
 
 
 def read_data(args):
+    # Use the `dtype` parameter of `pd.read_csv`.
     files = pd.read_csv(args.dataset, sep=",", header=None, names=["file"])
     files = files["file"].tolist()
     data = []