Skip to content

Commit

Permalink
Merge pull request #164 from SPAAM-community/add-ena-table-output-amd…
Browse files Browse the repository at this point in the history
…irt-autofill

Add ena table output amdirt autofill
  • Loading branch information
alexhbnr authored Feb 7, 2025
2 parents f5f8a4f + f49f186 commit 1d9a050
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 5 deletions.
33 changes: 28 additions & 5 deletions amdirt/autofill/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import sys
import pandas as pd

def run_autofill(accession, table_name=None, schema=None, dataset=None, sample_output=None, library_output=None, verbose=False):
def run_autofill(accession, table_name=None, schema=None, dataset=None, sample_output=None, library_output=None, verbose=False, output_ena_table=None):
"""Autofill the metadata of a table from ENA
Args:
Expand Down Expand Up @@ -56,22 +56,45 @@ def run_autofill(accession, table_name=None, schema=None, dataset=None, sample_o
query_dict = list()
for a in accession:
query_res = ena.query(a, fields=[
"experiment_accession",
"study_accession",
"run_accession",
"secondary_sample_accession",
"sample_alias",
"sample_accession",
"secondary_sample_accession",
"nominal_length",
"scientific_name",
"sample_title",
"bam_ftp",
"sra_ftp",
"fastq_ftp",
"fastq_md5",
"fastq_bytes",
"library_name",
"read_count",
"submitted_ftp",
"submitted_aspera",
"submitted_format",
"instrument_platform",
"instrument_model",
"scientific_name",
"tax_id",
"library_name",
"library_layout",
"library_strategy",
"read_count",
"library_selection",
"library_source",
])
query_dict += query_res
df_out = pd.DataFrame.from_dict(query_dict)


# Output ENA table to .csv.gz file
if output_ena_table is not None:
ena_table_output_path = output_ena_table
logger.info(f"ENA Table head (First 5 lines + headers)")
print(df_out.head(5))
df_out.to_csv(ena_table_output_path, sep='\t', index=False)
logger.info(f"Saved ENA table to {ena_table_output_path}")

df_out.rename(
columns={
"study_accession": "archive_project",
Expand Down
6 changes: 6 additions & 0 deletions amdirt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,12 @@ def convert(ctx, no_args_is_help=True, **kwargs):
default="ancientmetagenome-hostassociated",
show_default=True,
)
@click.option(
"-t",
"--output_ena_table",
type=click.Path(writable=True),
help="path to ENA table output file",
)
@click.option(
"-l",
"--library_output",
Expand Down

0 comments on commit 1d9a050

Please sign in to comment.