From d6348955ffa8000f0a61d4f37a836ce687791d10 Mon Sep 17 00:00:00 2001
From: Rens van de schoot <a.g.j.vandeschoot@uu.nl>
Date: Sun, 6 Oct 2024 17:07:48 +0200
Subject: [PATCH 1/6] Rename dataset_with_priors into dataset_custom_priors

---
 asreviewcontrib/makita/template_prior.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/asreviewcontrib/makita/template_prior.py b/asreviewcontrib/makita/template_prior.py
index bb45683..b99e3f5 100644
--- a/asreviewcontrib/makita/template_prior.py
+++ b/asreviewcontrib/makita/template_prior.py
@@ -95,10 +95,10 @@ def get_template_specific_params(self, params):
         )
         n_runs = self.n_runs if self.n_runs is not None else 1
 
-        # Check if at least one dataset with prior knowledge is present
+        # Check if at least one dataset with custom prior knowledge is present
         if self._prior_dataset_count == 0:
             raise ValueError(
-                "At least one dataset with prior knowledge (prefix 'prior_' or \
+                "At least one dataset with custom prior knowledge (prefix 'prior_' or \
                     'priors_') is required."
             )
 
@@ -108,8 +108,8 @@ def get_template_specific_params(self, params):
                 "At least one dataset without prior knowledge is required."
             )
 
-        # Print the number of datasets with and without prior knowledge
-        print(f"\nTotal datasets with prior knowledge: {self._prior_dataset_count}")
+        # Print the number of datasets with custom and without prior knowledge
+        print(f"\nTotal datasets with custom prior knowledge: {self._prior_dataset_count}")
         print(
             f"Total datasets without prior knowledge: {self._non_prior_dataset_count}"
         )
@@ -118,8 +118,8 @@ def get_template_specific_params(self, params):
         generated_folder = Path("generated_data")
         generated_folder.mkdir(parents=True, exist_ok=True)
 
-        # Set file paths for datasets with and without prior knowledge
-        filepath_with_priors = generated_folder / "dataset_with_priors.csv"
+        # Set file paths for datasets with custom records for prior knowledge and without pre-set prior knowledge from which a minimal training set of 2 will be selected
+        filepath_with_priors = generated_folder / "dataset_custom_priors.csv"
         filepath_without_priors = generated_folder / "dataset_without_priors.csv"
 
         # Combine all datasets into one DataFrame and remove rows where label is -1
@@ -136,7 +136,7 @@ def get_template_specific_params(self, params):
             combined_dataset["makita_priors"] == 0
         ].shape[0]
 
-        # Print the number of rows with and without prior knowledge
+        # Print the number of rows with custom and without prior knowledge
         print(f"Total rows of prior knowledge: {total_rows_with_priors}")
         print(f"Total rows of non-prior knowledge: {total_rows_without_priors}")
 
@@ -150,7 +150,7 @@ def get_template_specific_params(self, params):
             index_label='record_id'
         )
 
-        # Create a string of indices for rows with prior knowledge
+        # Create a string of indices for rows with custom prior knowledge
         prior_idx_list = combined_dataset[
             combined_dataset["makita_priors"] == 1
         ].index.tolist()

From 3812269590a2be3ebaa8a14eb45bd534983c8304 Mon Sep 17 00:00:00 2001
From: Rens van de schoot <a.g.j.vandeschoot@uu.nl>
Date: Sun, 6 Oct 2024 17:08:37 +0200
Subject: [PATCH 2/6] Adjust filename in template to custom_priors

---
 asreviewcontrib/makita/templates/template_prior.txt.template | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asreviewcontrib/makita/templates/template_prior.txt.template b/asreviewcontrib/makita/templates/template_prior.txt.template
index f404367..f7800b2 100644
--- a/asreviewcontrib/makita/templates/template_prior.txt.template
+++ b/asreviewcontrib/makita/templates/template_prior.txt.template
@@ -39,8 +39,8 @@ python -m asreview wordcloud {{ filepath_without_priors }} -o {{ output_folder }
 {% endif %}
 
 {% for run in range(n_runs) %}
-python -m asreview simulate {{ filepath_with_priors }} -s {{ output_folder }}/simulation/state_files/sim_with_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --seed {{ model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} --prior_idx {{ prior_idx }}
-python -m asreview metrics {{ output_folder }}/simulation/state_files/sim_with_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/metrics/metrics_sim_with_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.json
+python -m asreview simulate {{ filepath_with_priors }} -s {{ output_folder }}/simulation/state_files/sim_custom_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --seed {{ model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} --prior_idx {{ prior_idx }}
+python -m asreview metrics {{ output_folder }}/simulation/state_files/sim_custom_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/metrics/metrics_sim_custom_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.json
 
 python -m asreview simulate {{ filepath_without_priors }} -s {{ output_folder }}/simulation/state_files/sim_without_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --init_seed {{ init_seed + run }} --seed {{ model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
 python -m asreview metrics {{ output_folder }}/simulation/state_files/sim_without_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/metrics/metrics_sim_without_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.json

From 575462f7f69d1fde54670e6a9c5d382fb1f960c6 Mon Sep 17 00:00:00 2001
From: Rens van de schoot <a.g.j.vandeschoot@uu.nl>
Date: Sun, 6 Oct 2024 17:09:33 +0200
Subject: [PATCH 3/6] Rename output file sim_without_prior to sim_minimal_prior

---
 asreviewcontrib/makita/templates/template_prior.txt.template | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asreviewcontrib/makita/templates/template_prior.txt.template b/asreviewcontrib/makita/templates/template_prior.txt.template
index f7800b2..c295f28 100644
--- a/asreviewcontrib/makita/templates/template_prior.txt.template
+++ b/asreviewcontrib/makita/templates/template_prior.txt.template
@@ -42,8 +42,8 @@ python -m asreview wordcloud {{ filepath_without_priors }} -o {{ output_folder }
 python -m asreview simulate {{ filepath_with_priors }} -s {{ output_folder }}/simulation/state_files/sim_custom_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --seed {{ model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} --prior_idx {{ prior_idx }}
 python -m asreview metrics {{ output_folder }}/simulation/state_files/sim_custom_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/metrics/metrics_sim_custom_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.json
 
-python -m asreview simulate {{ filepath_without_priors }} -s {{ output_folder }}/simulation/state_files/sim_without_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --init_seed {{ init_seed + run }} --seed {{ model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
-python -m asreview metrics {{ output_folder }}/simulation/state_files/sim_without_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/metrics/metrics_sim_without_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.json
+python -m asreview simulate {{ filepath_without_priors }} -s {{ output_folder }}/simulation/state_files/sim_minimal_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --init_seed {{ init_seed + run }} --seed {{ model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
+python -m asreview metrics {{ output_folder }}/simulation/state_files/sim_minimal_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/metrics/metrics_sim_minimal_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.json
 
 {% endfor %}
 # Generate plot and tables for dataset

From 99cf6778d87a92c90189d7d67829e396e6c98bea Mon Sep 17 00:00:00 2001
From: Rens van de schoot <a.g.j.vandeschoot@uu.nl>
Date: Sun, 6 Oct 2024 17:23:59 +0200
Subject: [PATCH 4/6] linting

---
 asreviewcontrib/makita/template_prior.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/asreviewcontrib/makita/template_prior.py b/asreviewcontrib/makita/template_prior.py
index b99e3f5..40ce357 100644
--- a/asreviewcontrib/makita/template_prior.py
+++ b/asreviewcontrib/makita/template_prior.py
@@ -109,7 +109,8 @@ def get_template_specific_params(self, params):
             )
 
         # Print the number of datasets with custom and without prior knowledge
-        print(f"\nTotal datasets with custom prior knowledge: {self._prior_dataset_count}")
+        print(
+            f"\nTotal datasets with custom prior knowledge: {self._prior_dataset_count}")
         print(
             f"Total datasets without prior knowledge: {self._non_prior_dataset_count}"
         )
@@ -118,7 +119,9 @@ def get_template_specific_params(self, params):
         generated_folder = Path("generated_data")
         generated_folder.mkdir(parents=True, exist_ok=True)
 
-        # Set file paths for datasets with custom records for prior knowledge and without pre-set prior knowledge from which a minimal training set of 2 will be selected
+        # Set file paths for datasets with custom records for prior knowledge
+        # and without pre-set prior knowledge from which a minimal training
+        # set of 2 will be selected
         filepath_with_priors = generated_folder / "dataset_custom_priors.csv"
         filepath_without_priors = generated_folder / "dataset_without_priors.csv"
 

From 6bbea2864b8537275f6a26df83aa8285345a858b Mon Sep 17 00:00:00 2001
From: Rens van de schoot <a.g.j.vandeschoot@uu.nl>
Date: Sun, 6 Oct 2024 17:27:52 +0200
Subject: [PATCH 5/6] Adjust readme with new terminology

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index aa906e9..4d4d0d0 100644
--- a/README.md
+++ b/README.md
@@ -209,9 +209,9 @@ asreview makita template multimodel --classifiers logistic nb --feature_extracto
 
 command: `prior`
 
-The prior template evaluates how large amounts of prior knowledge might affect simulation performance. It processes two types of data in the data folder: labeled dataset(s) to be simulated and labeled dataset(s) to be used as prior knowledge. The filename(s) of the dataset(s) containing the prior knowledge should use the naming prefix `prior_[dataset_name]`. 
+The prior template evaluates how a set of custom prior knowledge might affect simulation performance. It processes two types of data in the data folder: labeled dataset(s) to be simulated and labeled dataset(s) to be used as prior knowledge. The filename(s) of the dataset(s) containing the custom prior knowledge should use the naming prefix `prior_[dataset_name]`.
 
-The template runs two simulations: the first simulation uses all records from the `prior_` dataset(s) as prior knowledge, and the second uses a 1+1 randomly chosen set of prior knowledge from the non-prior knowledge dataset. Both runs simulate performance on the combined non-prior dataset(s).
+The template runs two simulations: the first simulation uses all records from the `prior_` dataset(s) as prior knowledge, and the second uses a 1+1 randomly chosen set of prior knowledge from the non-prior knowledge dataset as a minimal training set. Both runs simulate performance on the combined non-prior dataset(s).
 
 Running this template creates a `generated_data` folder. This folder contains two datasets; `dataset_with_priors.csv` and `dataset_without_priors.csv`. The simulations specified in the generated jobs file will use these datasets for their simulations.
 

From 6d617d557955cb6122cea3c164f6de85af370323 Mon Sep 17 00:00:00 2001
From: Rens van de schoot <a.g.j.vandeschoot@uu.nl>
Date: Tue, 8 Oct 2024 14:39:13 +0200
Subject: [PATCH 6/6] Shorten line

---
 asreviewcontrib/makita/template_prior.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asreviewcontrib/makita/template_prior.py b/asreviewcontrib/makita/template_prior.py
index 40ce357..9466b42 100644
--- a/asreviewcontrib/makita/template_prior.py
+++ b/asreviewcontrib/makita/template_prior.py
@@ -110,9 +110,9 @@ def get_template_specific_params(self, params):
 
         # Print the number of datasets with custom and without prior knowledge
         print(
-            f"\nTotal datasets with custom prior knowledge: {self._prior_dataset_count}")
+            f"\nDatasets with custom prior knowledge: {self._prior_dataset_count}")
         print(
-            f"Total datasets without prior knowledge: {self._non_prior_dataset_count}"
+            f"Datasets without prior knowledge: {self._non_prior_dataset_count}"
         )
 
         # Create a directory for generated data if it doesn't already exist