Skip to content

Commit

Permalink
Merge pull request #338 from Ensembl/mbarba/fix_seq_compare
Browse files Browse the repository at this point in the history
Fix seq compare in integrity
  • Loading branch information
JAlvarezJarreta authored Apr 8, 2024
2 parents 76a4a25 + 14274d3 commit 6292e59
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/python/ensembl/io/genomio/manifest/check_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _check_md5sum(self, file_path: Path, md5sum: str) -> None:
if readable_hash != md5sum:
raise InvalidIntegrityError(f"Invalid md5 checksum for {file_path}")

def prepare_integrity_data(self) -> None:
def prepare_integrity_data(self) -> None: # pylint: disable=too-many-branches
"""Read all the files and keep a record (IDs and their lengths)
for each cases to be compared later.
"""
Expand Down Expand Up @@ -187,6 +187,10 @@ def prepare_integrity_data(self) -> None:
seq_circular[seq["name"]] = seq.get("circular", False)
if seq["coord_system_level"] == "contig":
seqr_seqlevel[seq["name"]] = int(seq["length"])
# Also record synonyms (in case GFF file uses synonyms)
if "synonyms" in seq:
for synonym in seq["synonyms"]:
seq_lengths[synonym["name"]] = int(seq["length"])
self.lengths["seq_regions"] = seq_lengths
self.circular["seq_regions"] = seq_circular
self.seq_regions = seq_regions
Expand Down Expand Up @@ -731,6 +735,7 @@ def _compare_seqs(
seq_id not in comp["common"]
and seq_id not in comp["diff"]
and seq_id not in comp["diff_circular"]
and seq_id not in seqrs
):
comp["only_feat"].append(seq_id)

Expand Down

0 comments on commit 6292e59

Please sign in to comment.