From 43c5024a00b510f54f264109544ccd7fed197925 Mon Sep 17 00:00:00 2001
From: ameknas <abdallahmeknas@gmail.com>
Date: Tue, 26 Mar 2024 15:25:54 -0400
Subject: [PATCH 01/26] test files

---
 tests/__init__.py                        |  0
 tests/test_GAS_assign.py                 | 44 ++++++++++++
 tests/test_GAS_matrix_splitter.py        | 62 +++++++++++++++++
 tests/test_GAS_mcluster.py               | 33 +++++++++
 tests/test_GAS_multi_level_clustering.py | 37 +++++++++++
 tests/test_GAS_utils.py                  | 85 ++++++++++++++++++++++++
 6 files changed, 261 insertions(+)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_GAS_assign.py
 create mode 100644 tests/test_GAS_matrix_splitter.py
 create mode 100644 tests/test_GAS_mcluster.py
 create mode 100644 tests/test_GAS_multi_level_clustering.py
 create mode 100644 tests/test_GAS_utils.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_GAS_assign.py b/tests/test_GAS_assign.py
new file mode 100644
index 0000000..0d5718c
--- /dev/null
+++ b/tests/test_GAS_assign.py
@@ -0,0 +1,44 @@
+import pytest
+import pandas as pd
+from tempfile import NamedTemporaryFile
+import os
+from genomic_address_service.classes.assign import assign
+
+@pytest.fixture
+def mock_dist_file():
+    content = """query_id\tref_id\tdist
+q1\tr1\t0.1
+q2\tr2\t0.2
+"""
+    with NamedTemporaryFile('w+', delete=False) as tmp:
+        tmp.write(content)
+        tmp.flush()
+        yield tmp.name
+        os.unlink(tmp.name)
+
+@pytest.fixture
+def mock_membership_file():
+    content = """id\tthreshold_0.1\tthreshold_0.2
+r1\t1\t1
+r2\t2\t1
+"""
+    with NamedTemporaryFile('w+', delete=False) as tmp:
+        tmp.write(content)
+        tmp.flush()
+        yield tmp.name
+        os.unlink(tmp.name)
+
+def test_initialization(mock_dist_file, mock_membership_file):
+    threshold_map = {"threshold_0.1": 0.1, "threshold_0.2": 0.2}
+    a = assign(dist_file=mock_dist_file, membership_file=mock_membership_file, threshold_map=threshold_map, linkage_method='single')
+    
+    assert a.status, "Initialization failed, check error_msgs for details"
+    assert not a.error_msgs, f"Unexpected errors during initialization: {a.error_msgs}"
+    assert isinstance(a.query_df, pd.DataFrame), "Query DataFrame not initialized properly"
+    assert isinstance(a.memberships_df, pd.DataFrame), "Memberships DataFrame"
+
+def test_check_membership_columns(mock_dist_file, mock_membership_file):
+    threshold_map = {"threshold_0.1": 0.1, "threshold_0.2": 0.2}
+    a = assign(dist_file=mock_dist_file, membership_file=mock_membership_file, threshold_map=threshold_map, linkage_method='single')
+    cols = ['threshold_0.1', 'threshold_0.2']
+    assert a.check_membership_columns(cols), "Membership column check failed for valid columns"
\ No newline at end of file
diff --git a/tests/test_GAS_matrix_splitter.py b/tests/test_GAS_matrix_splitter.py
new file mode 100644
index 0000000..c276e0d
--- /dev/null
+++ b/tests/test_GAS_matrix_splitter.py
@@ -0,0 +1,62 @@
+import pytest
+import os
+from tempfile import NamedTemporaryFile, TemporaryDirectory
+from genomic_address_service.classes.matrix_splitter import matrix_splitter
+
+@pytest.fixture
+def mock_matrix_file():
+    content = "Header\nRow1\t0.1\t0.2\nRow2\t0.2\t0.1\n"
+    with NamedTemporaryFile('w+', delete=False) as tmp:
+        tmp.write(content)
+        tmp.flush()
+    yield tmp.name
+    os.unlink(tmp.name)
+
+@pytest.fixture
+def big_mock_matrix_file():
+    content = "Header\nRow1\t0.1\t0.2\t0.3\t0.4\t0.5\nRow2\t0.6\t0.7\t0.8\t0.9\t1.0\nRow3\t1.1\t1.2\t1.3\t1.4\t1.5\nRow4\t1.6\t1.7\t1.8\t1.9\t2.0\nRow5\t2.1\t2.2\t2.3\t2.4\t2.5\n"
+    with NamedTemporaryFile('w+', delete=False) as tmp:
+        tmp.write(content)
+        tmp.flush()
+    yield tmp.name
+    os.unlink(tmp.name)
+
+@pytest.fixture
+def output_directory():
+    with TemporaryDirectory() as tmpdir:
+        yield tmpdir
+
+
+def test_initialization(mock_matrix_file, output_directory):
+    batch_size = 7
+    ms = matrix_splitter(mock_matrix_file, output_directory, batch_size)
+    assert ms.file_path == mock_matrix_file
+    assert ms.out_path == output_directory
+    assert ms.batch_size == batch_size
+    assert ms.is_ok == True  # Assuming the mock file and output directory meet the requirements
+
+def test_get_file_length(mock_matrix_file):
+    dummy_out_path = "dummy_out_path"
+    dummy_batch_size = 1
+    ms = matrix_splitter(file_path=mock_matrix_file, 
+                         out_path=dummy_out_path, 
+                         batch_size=dummy_batch_size)
+    expected_line_count = 10
+    assert ms.get_file_length() == expected_line_count, "get_file_length did not return the expected number of lines"
+
+def test_prep_batch_ranges(big_mock_matrix_file, output_directory):
+    batch_size = 1
+    ms = matrix_splitter(big_mock_matrix_file, output_directory, batch_size)
+    ms.prep_batch_ranges()
+    assert ms.num_batches == 2 
+    assert len(ms.ranges) == ms.num_batches
+
+@pytest.mark.parametrize("method_name", ["parse_distance_matrix_bins", "parse_distance_matrix_partitions"])
+def test_parse_methods(big_mock_matrix_file, output_directory, method_name):
+    batch_size = 1
+    ms = matrix_splitter(big_mock_matrix_file, output_directory, batch_size)
+    ms.prep_batch_ranges()
+    parse_method = getattr(ms, method_name)
+    parse_method()
+    for i in range(ms.num_batches):
+        assert os.path.exists(os.path.join(ms.out_path, f"{ms.prefix}-{i}.matrix"))
diff --git a/tests/test_GAS_mcluster.py b/tests/test_GAS_mcluster.py
new file mode 100644
index 0000000..89a4069
--- /dev/null
+++ b/tests/test_GAS_mcluster.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+import os
+import tempfile
+from genomic_address_service.mcluster import write_clusters  # Adjust the import path based on your project structure
+
+def test_write_clusters():
+    # Create mock cluster data
+    mock_clusters = {
+        '1': ['1', '1', '1'],
+        '2': ['1', '1', '2'],
+        '3': ['1', '2', '3']
+    }
+    num_thresholds = 3
+    delimiter = "."
+
+    # Create a temporary file
+    temp_file = tempfile.NamedTemporaryFile(delete=False)
+    try:
+        # Write mock clusters to the temporary file
+        write_clusters(mock_clusters, num_thresholds, temp_file.name, delimiter)
+
+        # Verify the contents of the file
+        with open(temp_file.name, 'r') as file:
+            lines = file.readlines()
+            # Check the header
+            assert lines[0].strip() == "id\taddress\tlevel_1\tlevel_2\tlevel_3"
+            # Check the first line of data
+            assert lines[1].strip() == "1\t1.1.1\t1\t1\t1"
+            assert lines[2].strip() == "2\t1.1.2\t1\t1\t2"
+            assert lines[3].strip() == "3\t1.2.3\t1\t2\t3"
+    finally:
+        # Clean up - delete the temporary file
+        os.remove(temp_file.name)
\ No newline at end of file
diff --git a/tests/test_GAS_multi_level_clustering.py b/tests/test_GAS_multi_level_clustering.py
new file mode 100644
index 0000000..bbbc5ee
--- /dev/null
+++ b/tests/test_GAS_multi_level_clustering.py
@@ -0,0 +1,37 @@
+import pytest
+from genomic_address_service.classes.multi_level_clustering import multi_level_clustering
+import tempfile
+import os
+
+@pytest.fixture
+def sample_distance_matrix():
+    content = """Header\tLabel1\tLabel2\tLabel3
+Label1\t0.0\t0.1\t0.2
+Label2\t0.1\t0.0\t0.3
+Label3\t0.2\t0.3\t0.0
+"""
+    with tempfile.NamedTemporaryFile('w+', delete=False) as tmp:
+        tmp.write(content)
+        tmp.flush()
+        yield tmp.name
+        os.unlink(tmp.name)
+
+def test_initialization(sample_distance_matrix):
+    thresholds = [0.15]
+    mlc = multi_level_clustering(dist_mat_file=sample_distance_matrix, thresholds=thresholds, method="single")
+    assert len(mlc.labels) == 3  # Expecting 3 labels based on the sample matrix
+    assert mlc.linkage is not None  # Linkage matrix should be created
+    assert 'Label1' in mlc.cluster_memberships  # Initial membership should be populated
+
+def test_assign_clusters(sample_distance_matrix):
+    thresholds = [0.15]
+    mlc = multi_level_clustering(dist_mat_file=sample_distance_matrix, thresholds=thresholds, method="single")
+    mlc.assign_clusters()
+    # This assertion may need to be adjusted based on expected cluster assignments
+    # I gussed 2?
+    assert all(len(clusters) == 2 for clusters in mlc.cluster_memberships.values())
+
+def test_newick_string(sample_distance_matrix):
+    thresholds = [0.15]
+    mlc = multi_level_clustering(dist_mat_file=sample_distance_matrix, thresholds=thresholds, method="single")
+    assert mlc.newick.endswith(";")  # Newick strings should end with a semicolon
\ No newline at end of file
diff --git a/tests/test_GAS_utils.py b/tests/test_GAS_utils.py
new file mode 100644
index 0000000..2264e73
--- /dev/null
+++ b/tests/test_GAS_utils.py
@@ -0,0 +1,85 @@
+import pytest
+import os
+import json
+import pandas as pd
+import tempfile
+from genomic_address_service.utils import (
+    get_file_length, get_file_header, get_file_footer,
+    is_matrix_valid, is_file_ok, format_threshold_map,
+    write_threshold_map, write_cluster_assignments,
+    init_threshold_map
+)
+
+def test_get_file_length():
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False) as tmpfile:
+        tmpfile.write("Line 1\nLine 2\nLine 3\n")
+    assert get_file_length(tmpfile.name) == 3
+    os.unlink(tmpfile.name)
+
+def test_get_file_header():
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False) as tmpfile:
+        tmpfile.write("Header\nLine 1\nLine 2")
+    assert get_file_header(tmpfile.name).strip() == "Header"
+    os.unlink(tmpfile.name)
+
+def test_get_file_footer():
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False) as tmpfile:
+        tmpfile.write("Line 1\nLine 2\nFooter")
+    assert get_file_footer(tmpfile.name).strip() == "Footer"
+    os.unlink(tmpfile.name)
+
+def test_is_matrix_valid():
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False) as tmpfile:
+        tmpfile.write("Header1\tHeader2\nValue1\tValue2\nValue3\tValue4")
+    assert is_matrix_valid(tmpfile.name) == True
+    os.unlink(tmpfile.name)
+
+def test_is_file_ok():
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False) as tmpfile:
+        tmpfile.write("Some content")
+    assert is_file_ok(tmpfile.name) == False
+    os.unlink(tmpfile.name)
+
+def test_format_threshold_map():
+    thresholds = [0.1, 0.2, 0.3]
+    expected_output = {'level_1': 0.1, 'level_2': 0.2, 'level_3': 0.3}
+    assert format_threshold_map(thresholds) == expected_output
+
+def test_write_threshold_map():
+    data = {'level_1': 0.1, 'level_2': 0.2, 'level_3': 0.3}
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False) as tmpfile:
+        write_threshold_map(data, tmpfile.name)
+        tmpfile.seek(0)
+        content = json.load(tmpfile)
+    assert content == data
+    os.unlink(tmpfile.name)
+
+@pytest.mark.parametrize("outfmt", ["text", "parquet"])
+def test_write_cluster_assignments(outfmt):
+    memberships = {'1': 'A.B.C', '2': 'D.E.F'}
+    threshold_map = {'level_1': 'A', 'level_2': 'B', 'level_3': 'C'}
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.txt' if outfmt == 'text' else '.parquet') as tmpfile:
+        write_cluster_assignments(tmpfile.name, memberships, threshold_map, outfmt)
+        if outfmt == 'text':
+            tmpfile.seek(0)
+            df = pd.read_csv(tmpfile.name, sep="\t")
+            assert 'id' in df.columns and 'address' in df.columns
+            assert df.iloc[0]['address'] == 'A.B.C'
+            assert df.iloc[1]['address'] == 'D.E.F'
+        else:
+            df = pd.read_parquet(tmpfile.name)
+            assert 'id' in df.columns and 'address' in df.columns
+            assert df.iloc[0]['address'] == 'A.B.C'
+            assert df.iloc[1]['address'] == 'D.E.F'
+    os.unlink(tmpfile.name)
+
+def test_init_threshold_map():
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.txt') as tmpfile:
+        tmpfile.write("id\tsample_id\tST\tnomenclature\taddress\tlevel_1\tlevel_2\tlevel_3\n")
+        tmpfile.write("1\tSample1\tST1\tNom1\tAddr1\t0.1\t0.2\t0.3\n")
+        tmpfile.seek(0)
+        thresholds = [0.1, 0.2, 0.3]
+        result = init_threshold_map(tmpfile.name, thresholds)
+        expected_result = {'level_1': 0.1, 'level_2': 0.2, 'level_3': 0.3}
+    assert result == expected_result
+    os.unlink(tmpfile.name)

From e738369da241087ab6bb8f6be43149df5324d97b Mon Sep 17 00:00:00 2001
From: ameknas <abdallahmeknas@gmail.com>
Date: Thu, 28 Mar 2024 10:06:31 -0400
Subject: [PATCH 02/26] added get ignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bee8a64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__

From d457c8051a42c9ae5b5d5447517f452704cecfb4 Mon Sep 17 00:00:00 2001
From: ameknas <abdallahmeknas@gmail.com>
Date: Thu, 28 Mar 2024 10:15:02 -0400
Subject: [PATCH 03/26] Updated git ignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index bee8a64..a28eca9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 __pycache__
+test_GAS_assign.py
+test_GAS_matrix_splitter.py
\ No newline at end of file

From b1be5ee78fd6409e5c1b1ce201328140daaa2dc2 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Thu, 7 Nov 2024 09:50:20 -0500
Subject: [PATCH 04/26] Fix AttributeError in address assignment by string
 conversion

---
 genomic_address_service/classes/assign.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genomic_address_service/classes/assign.py b/genomic_address_service/classes/assign.py
index c1782d4..216eb88 100644
--- a/genomic_address_service/classes/assign.py
+++ b/genomic_address_service/classes/assign.py
@@ -85,7 +85,7 @@ def format_df(self,data,delim='.'):
         self.error_samples = []
         membership = {}
         for sample_id in data:
-            address = data[sample_id].split(delim)
+            address = str(data[sample_id].split(delim))
             if len(address) != num_thresholds:
                 self.error_samples.append(sample_id)
                 continue

From 729dbdf8797097fe772bcb73985399dcd3d71ced Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Thu, 7 Nov 2024 11:33:24 -0500
Subject: [PATCH 05/26] Fix AttributeError in address assignment by string
 conversion

---
 genomic_address_service/classes/assign.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genomic_address_service/classes/assign.py b/genomic_address_service/classes/assign.py
index 216eb88..30977ff 100644
--- a/genomic_address_service/classes/assign.py
+++ b/genomic_address_service/classes/assign.py
@@ -85,7 +85,7 @@ def format_df(self,data,delim='.'):
         self.error_samples = []
         membership = {}
         for sample_id in data:
-            address = str(data[sample_id].split(delim))
+            address = str(data[sample_id]).split(delim)
             if len(address) != num_thresholds:
                 self.error_samples.append(sample_id)
                 continue

From e69d5faa4c19aa1b7faeb7933bf3fca5eb607345 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Thu, 7 Nov 2024 16:40:46 -0500
Subject: [PATCH 06/26] Updated CHANGELOG

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a45b512..73d52aa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,12 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.1.3] - 2024/11/..
+
+### `Fixed`
+
+- Converted `data[sample_id]` to a string in the `format_df` function with `assign.py` to prevent `AttributeErrors` when non-string values are in the genomic address.
+
 ## v1.0dev - [date]
 
 Initial release of phac-nml/genomic_address_service

From a5865b7e04133afd9fc5b92b5b6cbc3e0f45fceb Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Fri, 8 Nov 2024 15:13:05 -0500
Subject: [PATCH 07/26] Testing changes to function

---
 genomic_address_service/classes/multi_level_clustering.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genomic_address_service/classes/multi_level_clustering.py b/genomic_address_service/classes/multi_level_clustering.py
index 8d8a65a..e786cec 100644
--- a/genomic_address_service/classes/multi_level_clustering.py
+++ b/genomic_address_service/classes/multi_level_clustering.py
@@ -54,7 +54,7 @@ def buildNewick(self,node, newick, parentdist, leaf_names):
             return "%s:%f%s" % (leaf_names[node.id], parentdist - node.dist, newick)
         else:
             if len(newick) > 0:
-                newick = f"):{(parentdist - node.dist) / 2}{newick}"
+                newick = f"):{parentdist - node.dist}{newick}"
             else:
                 newick = ");"
             newick = self.buildNewick(node.get_left(), newick, node.dist, leaf_names)

From a20b15751ecbee0770a563e5c293c00ef78cfb39 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Tue, 19 Nov 2024 16:25:55 -0500
Subject: [PATCH 08/26] Update CHANGELOG.md

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 73d52aa..723180d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,3 +22,5 @@ Changed README format to standard DAAD README, added useage arguments.
 ### `Dependencies`
 
 ### `Deprecated`
+
+[0.1.3]: https://github.com/phac-nml/genomic_address_service/releases/tag/0.1.3

From 12630410d4ecea7d68138fb658e857330e6d0be8 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Tue, 19 Nov 2024 17:00:24 -0500
Subject: [PATCH 09/26] Updated CHANGELOG.md

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 723180d..dd8ebd2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Fixed`
 
-- Converted `data[sample_id]` to a string in the `format_df` function with `assign.py` to prevent `AttributeErrors` when non-string values are in the genomic address.
+- Converted `data[sample_id]` to a string in the `format_df` function with `assign.py` to prevent `AttributeErrors` when non-string values are in the genomic address. [PR14](https://github.com/phac-nml/genomic_address_service/pull/14)
+- Updated `buildNewick` formula to use cophenetic distances for branch lengths, aligning cluster visualization with BioNumerics dendrogram representation. [PR15](https://github.com/phac-nml/genomic_address_service/pull/15)
 
 ## v1.0dev - [date]
 

From 3c7bd350c474169efa75293262716d4d45dd2059 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Fri, 22 Nov 2024 13:04:01 -0500
Subject: [PATCH 10/26] Update test_GAS_assign.py to align with recent updates

---
 tests/test_GAS_assign.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/tests/test_GAS_assign.py b/tests/test_GAS_assign.py
index 0d5718c..3e3fc27 100644
--- a/tests/test_GAS_assign.py
+++ b/tests/test_GAS_assign.py
@@ -7,10 +7,11 @@
 @pytest.fixture
 def mock_dist_file():
     content = """query_id\tref_id\tdist
+q1\tq1\t0.0
 q1\tr1\t0.1
-q2\tr2\t0.2
+q1\tr2\t0.2
 """
-    with NamedTemporaryFile('w+', delete=False) as tmp:
+    with NamedTemporaryFile('w+', suffix='.tsv', delete=False) as tmp:
         tmp.write(content)
         tmp.flush()
         yield tmp.name
@@ -18,27 +19,25 @@ def mock_dist_file():
 
 @pytest.fixture
 def mock_membership_file():
-    content = """id\tthreshold_0.1\tthreshold_0.2
-r1\t1\t1
-r2\t2\t1
+    content = """id\taddress_levels_notsplit
+r1\t1.1
+r2\t2.1
 """
-    with NamedTemporaryFile('w+', delete=False) as tmp:
+    with NamedTemporaryFile('w+', suffix='.tsv', delete=False) as tmp:
         tmp.write(content)
         tmp.flush()
         yield tmp.name
         os.unlink(tmp.name)
 
 def test_initialization(mock_dist_file, mock_membership_file):
-    threshold_map = {"threshold_0.1": 0.1, "threshold_0.2": 0.2}
-    a = assign(dist_file=mock_dist_file, membership_file=mock_membership_file, threshold_map=threshold_map, linkage_method='single')
-    
+    threshold_map = {"level_0": 0.1, "level_1": 0.2}
+    a = assign(dist_file=mock_dist_file, membership_file=mock_membership_file, threshold_map=threshold_map, linkage_method='single', sample_col='id', address_col='address_levels_notsplit', batch_size=100)
     assert a.status, "Initialization failed, check error_msgs for details"
     assert not a.error_msgs, f"Unexpected errors during initialization: {a.error_msgs}"
-    assert isinstance(a.query_df, pd.DataFrame), "Query DataFrame not initialized properly"
     assert isinstance(a.memberships_df, pd.DataFrame), "Memberships DataFrame"
 
 def test_check_membership_columns(mock_dist_file, mock_membership_file):
-    threshold_map = {"threshold_0.1": 0.1, "threshold_0.2": 0.2}
-    a = assign(dist_file=mock_dist_file, membership_file=mock_membership_file, threshold_map=threshold_map, linkage_method='single')
-    cols = ['threshold_0.1', 'threshold_0.2']
+    threshold_map = {"level_0": 0.1, "level_1": 0.2}
+    a = assign(dist_file=mock_dist_file, membership_file=mock_membership_file, threshold_map=threshold_map, linkage_method='single', sample_col='id', address_col='address_levels_notsplit', batch_size=100)
+    cols = ['level_0', 'level_1']
     assert a.check_membership_columns(cols), "Membership column check failed for valid columns"
\ No newline at end of file

From 180451bc3fc25bc480e2bd3066a19d6c50b27ae8 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Fri, 22 Nov 2024 14:23:59 -0500
Subject: [PATCH 11/26] Update test_GAS_utils.py to align with recent updates

---
 tests/test_GAS_utils.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/tests/test_GAS_utils.py b/tests/test_GAS_utils.py
index 2264e73..1f45cf3 100644
--- a/tests/test_GAS_utils.py
+++ b/tests/test_GAS_utils.py
@@ -74,12 +74,8 @@ def test_write_cluster_assignments(outfmt):
     os.unlink(tmpfile.name)
 
 def test_init_threshold_map():
-    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.txt') as tmpfile:
-        tmpfile.write("id\tsample_id\tST\tnomenclature\taddress\tlevel_1\tlevel_2\tlevel_3\n")
-        tmpfile.write("1\tSample1\tST1\tNom1\tAddr1\t0.1\t0.2\t0.3\n")
-        tmpfile.seek(0)
-        thresholds = [0.1, 0.2, 0.3]
-        result = init_threshold_map(tmpfile.name, thresholds)
-        expected_result = {'level_1': 0.1, 'level_2': 0.2, 'level_3': 0.3}
+    thresholds = [0.1, 0.2, 0.3]
+    result = init_threshold_map(thresholds)
+    expected_result = {0: 0.1, 1: 0.2, 2: 0.3}
     assert result == expected_result
-    os.unlink(tmpfile.name)
+    
\ No newline at end of file

From 146a3d83209feb77788882fcd0ce4c3c77dd99f7 Mon Sep 17 00:00:00 2001
From: kylacochrane <Kyla.Cochrane@phac-aspc.gc.ca>
Date: Fri, 22 Nov 2024 15:12:06 -0500
Subject: [PATCH 12/26] Update test_GAS_matrix_splitter.py to align with recent
 updates

---
 tests/test_GAS_matrix_splitter.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_GAS_matrix_splitter.py b/tests/test_GAS_matrix_splitter.py
index c276e0d..62a4c28 100644
--- a/tests/test_GAS_matrix_splitter.py
+++ b/tests/test_GAS_matrix_splitter.py
@@ -41,15 +41,16 @@ def test_get_file_length(mock_matrix_file):
     ms = matrix_splitter(file_path=mock_matrix_file, 
                          out_path=dummy_out_path, 
                          batch_size=dummy_batch_size)
-    expected_line_count = 10
+    expected_line_count = 3
     assert ms.get_file_length() == expected_line_count, "get_file_length did not return the expected number of lines"
 
 def test_prep_batch_ranges(big_mock_matrix_file, output_directory):
     batch_size = 1
     ms = matrix_splitter(big_mock_matrix_file, output_directory, batch_size)
     ms.prep_batch_ranges()
-    assert ms.num_batches == 2 
+    assert ms.num_batches == 5 
     assert len(ms.ranges) == ms.num_batches
+    assert ms.ranges == [(i, i+1) for i in range(0, ms.num_batches)]
 
 @pytest.mark.parametrize("method_name", ["parse_distance_matrix_bins", "parse_distance_matrix_partitions"])
 def test_parse_methods(big_mock_matrix_file, output_directory, method_name):

From 41bdffe9aead60f8f52676a9f6cfbdc43088b48a Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Tue, 3 Dec 2024 10:16:43 -0500
Subject: [PATCH 13/26] Fix TypeError

---
 genomic_address_service/classes/matrix_splitter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genomic_address_service/classes/matrix_splitter.py b/genomic_address_service/classes/matrix_splitter.py
index 406ce27..987354e 100644
--- a/genomic_address_service/classes/matrix_splitter.py
+++ b/genomic_address_service/classes/matrix_splitter.py
@@ -51,7 +51,7 @@ def prep_batch_ranges(self):
         rem = self.batch_size % self.num_batches
         ranges = []
         for i in range(0,self.num_batches):
-            ranges.append(i*self.batch_size,i*self.batch_size+self.batch_size)
+            ranges.append((i*self.batch_size,i*self.batch_size+self.batch_size))
         if rem != 0:
             r = ranges[-1]
             r[1] = self.num_lines

From 643b3b6c280d2e96c2ebde4cfc956f7fc01f22b7 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 4 Dec 2024 13:41:20 -0500
Subject: [PATCH 14/26] Removed unused class and subsequent pytests

---
 .../classes/matrix_splitter.py                | 113 ------------------
 tests/test_GAS_matrix_splitter.py             |  63 ----------
 2 files changed, 176 deletions(-)
 delete mode 100644 genomic_address_service/classes/matrix_splitter.py
 delete mode 100644 tests/test_GAS_matrix_splitter.py

diff --git a/genomic_address_service/classes/matrix_splitter.py b/genomic_address_service/classes/matrix_splitter.py
deleted file mode 100644
index 987354e..0000000
--- a/genomic_address_service/classes/matrix_splitter.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import os
-
-class matrix_splitter:
-    file_path = None
-    out_path = None
-    batch_size = None
-    prefix = 'segment'
-    num_lines = 0
-    delim = "\t"
-    is_ok = True
-    partitions = []
-    error_msg = []
-
-    def __init__(self,file_path,out_path,batch_size,partitions=[],prefix=None,delim='\t'):
-        self.file_path = file_path
-        self.out_path = out_path
-        self.batch_size = batch_size
-        self.prefix = prefix
-        self.delim = delim
-        self.num_batches = 0
-        self.ranges = []
-        if len(partitions) > 0:
-            self.partitions = partitions
-
-        if not os.path.isfile(file_path):
-            self.error_msg.append("Error matrix file: {} does not exist".format(self.file_path))
-            self.is_ok = False
-            return
-
-        self.num_lines = self.get_file_length() - 1
-
-        if self.num_lines < 2:
-            self.error_msg.append("Error matrix file: {} does not contain at least two samples".format(self.file_path))
-            self.is_ok = False
-            return
-
-        if not os.path.isdir(self.out_path):
-            self.error_msg.append("Directory: {} does not exist".format(self.out_path))
-            self.is_ok = False
-            return
-
-        if self.batch_size  < 5:
-            self.batch_size = 5
-
-
-    def get_file_length(self):
-        return int(os.popen(f'wc -l {self.file_path}').read().split()[0])
-
-    def prep_batch_ranges(self):
-        self.num_batches = int(self.num_lines / self.batch_size)
-        rem = self.batch_size % self.num_batches
-        ranges = []
-        for i in range(0,self.num_batches):
-            ranges.append((i*self.batch_size,i*self.batch_size+self.batch_size))
-        if rem != 0:
-            r = ranges[-1]
-            r[1] = self.num_lines
-        self.ranges = ranges
-
-
-    def parse_distance_matrix_bins(self):
-        '''
-        Reads in a lower triangle/full distance matrix and splits it into component matricies
-        according to the desired number of samples in each batch. Matrix is returned in lower triangle format
-        :return:
-        '''
-        with open(self.file_path, 'r') as f:
-            header = next(f).split(self.delim)  # skip header
-            line_num = 0
-            range_index = 0
-            start, end = self.ranges[range_index]
-            out_fh = open(os.path.join(self.out_path,"{}-{}.matrix".format(self.prefix,range_index)),'w')
-            out_fh.write("{}\n".format("{}".format(self.delim).join([str(x) for x in header[start,end]])))
-            for line in f:
-                line_split = line.strip().split(self.delim)
-                label = line_split[0]
-                distances = list(map(float, line_split[start:end]))
-                out_fh.write("{}\t{}\n".format(label,"{}".format(self.delim).join([str(x) for x in distances])))
-                line_num += 1
-                if line_num > end:
-                    range_index+=1
-                    start, end = self.ranges[range_index]
-                    out_fh.close()
-                    out_fh = open(os.path.join(self.out_path, "{}-{}.matrix".format(self.prefix, range_index)), 'w')
-                    out_fh.write("{}\n".format("{}".format(self.delim).join([str(x) for x in header[start, end]])))
-        out_fh.close()
-
-    def parse_distance_matrix_partitions(self):
-        '''
-        Reads in a lower triangle/full distance matrix and splits it into component matricies
-        according to the desired number of samples in each batch. Matrix is returned in lower triangle format
-        :return:
-        '''
-        with open(self.file_path, 'r') as f:
-            header = next(f).split(self.delim)  # skip header
-            line_num = 0
-            range_index = 0
-            start, end = self.ranges[range_index]
-            out_fh = open(os.path.join(self.out_path,"{}-{}.matrix".format(self.prefix,range_index)),'w')
-            out_fh.write("{}\n".format("{}".format(self.delim).join([str(x) for x in header[start,end]])))
-            for line in f:
-                line_split = line.strip().split(self.delim)
-                label = line_split[0]
-                distances = list(map(float, line_split[start:end]))
-                out_fh.write("{}\t{}\n".format(label,"{}".format(self.delim).join([str(x) for x in distances])))
-                line_num += 1
-                if line_num > end:
-                    range_index+=1
-                    start, end = self.ranges[range_index]
-                    out_fh.close()
-                    out_fh = open(os.path.join(self.out_path, "{}-{}.matrix".format(self.prefix, range_index)), 'w')
-                    out_fh.write("{}\n".format("{}".format(self.delim).join([str(x) for x in header[start, end]])))
-        out_fh.close()
\ No newline at end of file
diff --git a/tests/test_GAS_matrix_splitter.py b/tests/test_GAS_matrix_splitter.py
deleted file mode 100644
index 62a4c28..0000000
--- a/tests/test_GAS_matrix_splitter.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import pytest
-import os
-from tempfile import NamedTemporaryFile, TemporaryDirectory
-from genomic_address_service.classes.matrix_splitter import matrix_splitter
-
-@pytest.fixture
-def mock_matrix_file():
-    content = "Header\nRow1\t0.1\t0.2\nRow2\t0.2\t0.1\n"
-    with NamedTemporaryFile('w+', delete=False) as tmp:
-        tmp.write(content)
-        tmp.flush()
-    yield tmp.name
-    os.unlink(tmp.name)
-
-@pytest.fixture
-def big_mock_matrix_file():
-    content = "Header\nRow1\t0.1\t0.2\t0.3\t0.4\t0.5\nRow2\t0.6\t0.7\t0.8\t0.9\t1.0\nRow3\t1.1\t1.2\t1.3\t1.4\t1.5\nRow4\t1.6\t1.7\t1.8\t1.9\t2.0\nRow5\t2.1\t2.2\t2.3\t2.4\t2.5\n"
-    with NamedTemporaryFile('w+', delete=False) as tmp:
-        tmp.write(content)
-        tmp.flush()
-    yield tmp.name
-    os.unlink(tmp.name)
-
-@pytest.fixture
-def output_directory():
-    with TemporaryDirectory() as tmpdir:
-        yield tmpdir
-
-
-def test_initialization(mock_matrix_file, output_directory):
-    batch_size = 7
-    ms = matrix_splitter(mock_matrix_file, output_directory, batch_size)
-    assert ms.file_path == mock_matrix_file
-    assert ms.out_path == output_directory
-    assert ms.batch_size == batch_size
-    assert ms.is_ok == True  # Assuming the mock file and output directory meet the requirements
-
-def test_get_file_length(mock_matrix_file):
-    dummy_out_path = "dummy_out_path"
-    dummy_batch_size = 1
-    ms = matrix_splitter(file_path=mock_matrix_file, 
-                         out_path=dummy_out_path, 
-                         batch_size=dummy_batch_size)
-    expected_line_count = 3
-    assert ms.get_file_length() == expected_line_count, "get_file_length did not return the expected number of lines"
-
-def test_prep_batch_ranges(big_mock_matrix_file, output_directory):
-    batch_size = 1
-    ms = matrix_splitter(big_mock_matrix_file, output_directory, batch_size)
-    ms.prep_batch_ranges()
-    assert ms.num_batches == 5 
-    assert len(ms.ranges) == ms.num_batches
-    assert ms.ranges == [(i, i+1) for i in range(0, ms.num_batches)]
-
-@pytest.mark.parametrize("method_name", ["parse_distance_matrix_bins", "parse_distance_matrix_partitions"])
-def test_parse_methods(big_mock_matrix_file, output_directory, method_name):
-    batch_size = 1
-    ms = matrix_splitter(big_mock_matrix_file, output_directory, batch_size)
-    ms.prep_batch_ranges()
-    parse_method = getattr(ms, method_name)
-    parse_method()
-    for i in range(ms.num_batches):
-        assert os.path.exists(os.path.join(ms.out_path, f"{ms.prefix}-{i}.matrix"))

From 6c0b3525ac311e12c729b3b3c19aac165d597a3c Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 09:23:32 -0500
Subject: [PATCH 15/26] Updated python packages

---
 setup.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 7d1f5e2..24215fe 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@ def read(fname):
     name='genomic_address_service',
     include_package_data=True,
     version=__version__,
-    python_requires='>=3.8.2,<4',
+    python_requires='>=3.10.0,<4',
     setup_requires=['pytest-runner'],
     tests_require=['pytest'],
     packages=find_packages(exclude=['tests']),
@@ -49,12 +49,15 @@ def read(fname):
 
     install_requires=[
         'pyarrow==12.0.0',
-        'fastparquet==2023.4.0',
         'numba==0.57.1',
         'numpy==1.24.4',
         'tables==3.8.0',
         'six>=1.16.0',
         'pandas==2.0.2 ',
+        'pytest==8.3.3',
+        'scipy==1.14.1',
+        'psutil==6.1.0',
+        'fastparquet==2023.4.0' #Will drop support of fastparquet in future versions
 
     ],
 

From 2f373c3ebf4ae1c79f7bbc1aa7ee00fa26fcf8ee Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 10:01:29 -0500
Subject: [PATCH 16/26] Add CI and branch protection to Github actions

---
 .github/workflows/branch.yml | 44 ++++++++++++++++++++++++++++++++++++
 .github/workflows/ci.yml     | 40 ++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 .github/workflows/branch.yml
 create mode 100644 .github/workflows/ci.yml

diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml
new file mode 100644
index 0000000..0536767
--- /dev/null
+++ b/.github/workflows/branch.yml
@@ -0,0 +1,44 @@
+name: GAS branch protection
+# This workflow is triggered on PRs to main branch on the repository
+# It fails when someone tries to make a PR against the phac-nml `main` branch instead of `dev`
+on:
+  pull_request_target:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      # PRs to the phac-nml repo main branch are only ok if coming from the phac-nml repo `dev` or any `patch` branches
+      - name: Check PRs
+        if: github.repository == 'phac-nml/genomic_address_service'
+        run: |
+          { [[ ${{github.event.pull_request.head.repo.full_name }} == phac-nml/genomic_address_service ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]
+
+      # If the above check failed, post a comment on the PR explaining the failure
+      # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets
+      - name: Post PR comment
+        if: failure()
+        uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2
+        with:
+          message: |
+            ## This PR is against the `main` branch :x:
+
+            * Do not close this PR
+            * Click _Edit_ and change the `base` to `dev`
+            * This CI test will remain failed until you push a new commit
+
+            ---
+
+            Hi @${{ github.event.pull_request.user.login }},
+
+            It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `main` branch.
+            The `main` branch on phac-nml repositories should always contain code from the latest release.
+            Because of this, PRs to `main` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch.
+
+            You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page.
+            Note that even after this, the test will continue to show as failing until you push a new commit.
+
+            Thanks again for your contribution!
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+          allow-repeats: false
\ No newline at end of file
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..6535935
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,40 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ $default-branch ]
+  pull_request:
+    branches: [ $default-branch ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11"]
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest

From 8b6eac8e04f7200d5ae65927e1d6730cf84e18e6 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 10:04:55 -0500
Subject: [PATCH 17/26] Update changelog

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 723180d..71d8a89 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,11 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## Development
+
+- Fixed pytests
+- Added github actions for pytest and branch protection
+
 ## [0.1.3] - 2024/11/..
 
 ### `Fixed`

From 7fcd288477f378276633f5fe85525f38b1c5bfa7 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 10:14:54 -0500
Subject: [PATCH 18/26] Modified ci instructions

---
 .github/workflows/ci.yml | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6535935..2d29e39 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,13 +5,18 @@ name: Python package
 
 on:
   push:
-    branches: [ $default-branch ]
+    branches:
+      - dev
   pull_request:
-    branches: [ $default-branch ]
+  release:
+    types: [published]
 
-jobs:
-  build:
 
+jobs:
+  test:
+    name: Run pytest
+    # Only run on push if this is the phac-nml dev branch (merged PRs)
+    if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'phac-nml/genomic_address_service') }}"
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false

From 266485a2628a9f0a5a0f7b5476aa8cfb666faee7 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 10:18:33 -0500
Subject: [PATCH 19/26] Modified ci instructions

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2d29e39..72023d8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,6 +33,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install flake8 pytest
+        pip install .
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - name: Lint with flake8
       run: |

From 2ada99b5038bac270a6db6762c097cdec5591bb7 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 10:40:26 -0500
Subject: [PATCH 20/26] Inline comments

---
 .github/workflows/ci.yml |  2 +-
 .gitignore               |  4 +---
 CHANGELOG.md             | 10 +++++-----
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 72023d8..aece723 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11"]
+        python-version: ["3.10", "3.13"]
 
     steps:
     - uses: actions/checkout@v4
diff --git a/.gitignore b/.gitignore
index a28eca9..496ee2c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1 @@
-__pycache__
-test_GAS_assign.py
-test_GAS_matrix_splitter.py
\ No newline at end of file
+.DS_Store
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 71d8a89..67f9a52 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,17 +3,17 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## Development
-
-- Fixed pytests
-- Added github actions for pytest and branch protection
-
 ## [0.1.3] - 2024/11/..
 
 ### `Fixed`
 
 - Converted `data[sample_id]` to a string in the `format_df` function with `assign.py` to prevent `AttributeErrors` when non-string values are in the genomic address.
 
+### `Added`
+
+- Fixed pytests
+- Added github actions for pytest and branch protection
+
 ## v1.0dev - [date]
 
 Initial release of phac-nml/genomic_address_service

From 2f608f2d174f405b513c70b1e57d2bdb7e58e400 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 11:15:08 -0500
Subject: [PATCH 21/26] numba cannot handle python 3.13

---
 .github/workflows/ci.yml | 2 +-
 setup.py                 | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index aece723..bb4a74a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.13"]
+        python-version: ["3.10", "3.12"]
 
     steps:
     - uses: actions/checkout@v4
diff --git a/setup.py b/setup.py
index 24215fe..568e227 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@ def read(fname):
     name='genomic_address_service',
     include_package_data=True,
     version=__version__,
-    python_requires='>=3.10.0,<4',
+    python_requires='>=3.10.0,<3.13.0',
     setup_requires=['pytest-runner'],
     tests_require=['pytest'],
     packages=find_packages(exclude=['tests']),
@@ -49,7 +49,7 @@ def read(fname):
 
     install_requires=[
         'pyarrow==12.0.0',
-        'numba==0.57.1',
+        'numba==0.59.1',
         'numpy==1.24.4',
         'tables==3.8.0',
         'six>=1.16.0',

From 7f99793006f58e66e3cfa31f3b431dd69d29ffb4 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 11:48:48 -0500
Subject: [PATCH 22/26] Dependencies versions

---
 .github/workflows/ci.yml | 2 +-
 setup.py                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bb4a74a..34ef399 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.12"]
+        python-version: ["3.10","3.11","3.12"]
 
     steps:
     - uses: actions/checkout@v4
diff --git a/setup.py b/setup.py
index 568e227..0fcabff 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def read(fname):
     install_requires=[
         'pyarrow==12.0.0',
         'numba==0.59.1',
-        'numpy==1.24.4',
+        'numpy==1.26.4',
         'tables==3.8.0',
         'six>=1.16.0',
         'pandas==2.0.2 ',

From 0405af2777f03b68f15277dc9dec25762e9575c2 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 11:52:24 -0500
Subject: [PATCH 23/26] Updated tables package version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0fcabff..ee2b502 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@ def read(fname):
         'pyarrow==12.0.0',
         'numba==0.59.1',
         'numpy==1.26.4',
-        'tables==3.8.0',
+        'tables==3.9.1',
         'six>=1.16.0',
         'pandas==2.0.2 ',
         'pytest==8.3.3',

From e62a06ea7e560624ba438ba201f6381e2156242f Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 12:06:56 -0500
Subject: [PATCH 24/26] Updated pyarrow package version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ee2b502..59e968d 100644
--- a/setup.py
+++ b/setup.py
@@ -48,7 +48,7 @@ def read(fname):
     },
 
     install_requires=[
-        'pyarrow==12.0.0',
+        'pyarrow>=14.0.0',
         'numba==0.59.1',
         'numpy==1.26.4',
         'tables==3.9.1',

From 3b60462dbd093eb16fbab092c7d3f037cdb1dbc8 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 18 Dec 2024 12:53:06 -0500
Subject: [PATCH 25/26] Removed tests in python 3.10

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 34ef399..1ab2546 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10","3.11","3.12"]
+        python-version: ["3.10","3.12"]
 
     steps:
     - uses: actions/checkout@v4

From 565bf9c73b56f26bd2e5be3981902937100f1544 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Fri, 20 Dec 2024 11:27:04 -0500
Subject: [PATCH 26/26] Patch release version

---
 CHANGELOG.md                       | 6 +++---
 genomic_address_service/version.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6900c16..b4c112a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.1.3] - 2024/11/..
+## [0.1.3] - 2024-12-20
 
 ### `Fixed`
 
@@ -12,8 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
-- Fixed pytests
-- Added github actions for pytest and branch protection
+- Fixed pytests [PR7](https://github.com/phac-nml/genomic_address_service/pull/7)
+- Added github actions for pytest and branch protection [PR7](https://github.com/phac-nml/genomic_address_service/pull/7)
 
 ## v1.0dev - [date]
 
diff --git a/genomic_address_service/version.py b/genomic_address_service/version.py
index 34da6b8..ef0b380 100644
--- a/genomic_address_service/version.py
+++ b/genomic_address_service/version.py
@@ -1 +1 @@
-__version__ = '0.1.2'
\ No newline at end of file
+__version__ = '0.1.3'
\ No newline at end of file