Skip to content

Commit

Permalink
Merge pull request #132 from maxibor/review_comments
Browse files Browse the repository at this point in the history
Code and documentation changes to address review comments
  • Loading branch information
maxibor authored Dec 15, 2023
2 parents b4174fc + 9191e30 commit ce9948f
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 23 deletions.
86 changes: 67 additions & 19 deletions AMDirT/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
logging.basicConfig(level=logging.INFO)

handler = colorlog.StreamHandler()
handler.setFormatter(colorlog.ColoredFormatter(
'%(log_color)s%(name)s [%(levelname)s]: %(message)s'))
handler.setFormatter(
colorlog.ColoredFormatter("%(log_color)s%(name)s [%(levelname)s]: %(message)s")
)

logger = colorlog.getLogger('AMDirT')
logger = colorlog.getLogger("AMDirT")
logger.addHandler(handler)
logger.propagate = False

Expand All @@ -29,15 +30,21 @@ def monkeypatch_get_storage_manager():
else:
# When running in "raw mode", we can't access the CacheStorageManager,
# so we're falling back to InMemoryCache.
# https://github.com/streamlit/streamlit/issues/6620
# _LOGGER.warning("No runtime found, using MemoryCacheStorageManager")
return st.runtime.caching.storage.dummy_cache_storage.MemoryCacheStorageManager()
return (
st.runtime.caching.storage.dummy_cache_storage.MemoryCacheStorageManager()
)


st.runtime.caching._data_caches.get_storage_manager = monkeypatch_get_storage_manager


def get_json_path():
path = get_module_dir("AMDirT.assets").joinpath("tables.json")
return path


@st.cache_data
def get_amdir_tags():
r = requests.get(
Expand Down Expand Up @@ -87,8 +94,14 @@ def doi2bib(doi: str) -> str:

return r.text


@st.cache_data
def get_libraries(table_name: str, samples: pd.DataFrame, libraries: pd.DataFrame, supported_archives: Iterable[str]):
def get_libraries(
table_name: str,
samples: pd.DataFrame,
libraries: pd.DataFrame,
supported_archives: Iterable[str],
):
"""Get filtered libraries from samples and libraries tables
Args:
Expand Down Expand Up @@ -127,6 +140,7 @@ def get_libraries(table_name: str, samples: pd.DataFrame, libraries: pd.DataFram

return selected_libraries


def get_filename(path_string: str, orientation: str) -> Tuple[str, str]:
"""
Get Fastq Filename from download_links column
Expand All @@ -149,8 +163,8 @@ def get_filename(path_string: str, orientation: str) -> Tuple[str, str]:
elif orientation == "rev":
return rev

def parse_to_mag(selected_libraries):

def parse_to_mag(selected_libraries):
selected_libraries["short_reads_1"] = selected_libraries["download_links"].apply(
get_filename, orientation="fwd"
)
Expand All @@ -176,6 +190,7 @@ def parse_to_mag(selected_libraries):
)
return selected_libraries


@st.cache_data
def prepare_eager_table(
samples: pd.DataFrame,
Expand Down Expand Up @@ -270,10 +285,14 @@ def prepare_mag_table(
)

# Create a DataFrame for "SINGLE" values
single_libraries = selected_libraries[selected_libraries["library_layout"] == "SINGLE"]
single_libraries = selected_libraries[
selected_libraries["library_layout"] == "SINGLE"
]

# Create a DataFrame for "PAIRED" values
paired_libraries = selected_libraries[selected_libraries["library_layout"] == "PAIRED"]
paired_libraries = selected_libraries[
selected_libraries["library_layout"] == "PAIRED"
]

if not single_libraries.empty:
single_libraries = parse_to_mag(single_libraries)
Expand All @@ -282,6 +301,7 @@ def prepare_mag_table(

return single_libraries, paired_libraries


@st.cache_data
def prepare_accession_table(
samples: pd.DataFrame,
Expand Down Expand Up @@ -335,6 +355,7 @@ def prepare_accession_table(
"aspera_script": dl_script_header + aspera_script,
}


@st.cache_data
def prepare_taxprofiler_table(
samples: pd.DataFrame,
Expand Down Expand Up @@ -365,26 +386,53 @@ def prepare_taxprofiler_table(
get_filename, orientation="rev"
)

selected_libraries["fastq_2"] = selected_libraries["fastq_2"].replace(
"NA", ""
)
selected_libraries["fastq_2"] = selected_libraries["fastq_2"].replace("NA", "")

selected_libraries["fasta"] = ""

selected_libraries['instrument_model'] = where(selected_libraries['instrument_model'].str.lower().str.contains('illumina|nextseq|hiseq|miseq'), 'ILLUMINA',
where(selected_libraries['instrument_model'].str.lower().str.contains('torrent'), 'ION_TORRENT',
where(selected_libraries['instrument_model'].str.lower().str.contains('helicos'), 'HELICOS',
where(selected_libraries['instrument_model'].str.lower().str.contains('bgiseq'), 'BGISEQ',
where(selected_libraries['instrument_model'].str.lower().str.contains('454'), 'LS454',
selected_libraries['instrument_model']))))
selected_libraries["instrument_model"] = where(
selected_libraries["instrument_model"]
.str.lower()
.str.contains("illumina|nextseq|hiseq|miseq"),
"ILLUMINA",
where(
selected_libraries["instrument_model"].str.lower().str.contains("torrent"),
"ION_TORRENT",
where(
selected_libraries["instrument_model"]
.str.lower()
.str.contains("helicos"),
"HELICOS",
where(
selected_libraries["instrument_model"]
.str.lower()
.str.contains("bgiseq"),
"BGISEQ",
where(
selected_libraries["instrument_model"]
.str.lower()
.str.contains("454"),
"LS454",
selected_libraries["instrument_model"],
),
),
),
),
)

col2keep = ["sample_name", "library_name", "instrument_model", "fastq_1", "fastq_2", "fasta"]
col2keep = [
"sample_name",
"library_name",
"instrument_model",
"fastq_1",
"fastq_2",
"fasta",
]
selected_libraries = selected_libraries[col2keep].rename(
columns={
"sample_name": "sample",
"library_name": "run_accession",
"instrument_model": "instrument_platform"
"instrument_model": "instrument_platform",
}
)

Expand Down
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,21 @@ For documentation on using the tool, please see [How Tos](https://amdirt.readthe

## Install

### 1. With pip
### 1. With [pip](https://pip.pypa.io/en/stable/getting-started/)

```bash
pip install amdirt
```

### 2. With conda

Installing AMDirT in a dedicated conda environment
Installing AMDirT in a dedicated [conda](https://docs.conda.io/projects/miniconda/en/latest/index.html) environment

```bash
conda create -n amdirt -c bioconda amdirt
conda activate amdirt
conda create -n amdirt -c bioconda amdirt #install amdirt in a dedicated conda environment
conda activate amdirt # activate the conda environment
# use amdirt
conda deactivate amdirt # deactivate the conda environment
```

### The latest development version, directly from GitHub
Expand Down

0 comments on commit ce9948f

Please sign in to comment.