Merge pull request #132 from maxibor/review_comments

Code and documentation changes to address review comments
SPAAM-community · Dec 15, 2023 · ce9948f · ce9948f
2 parents b4174fc + 9191e30
commit ce9948f
Show file tree

Hide file tree

Showing 2 changed files with 73 additions and 23 deletions.
diff --git a/AMDirT/core/__init__.py b/AMDirT/core/__init__.py
@@ -15,10 +15,11 @@
 logging.basicConfig(level=logging.INFO)
 
 handler = colorlog.StreamHandler()
-handler.setFormatter(colorlog.ColoredFormatter(
-	'%(log_color)s%(name)s [%(levelname)s]: %(message)s'))
+handler.setFormatter(
+    colorlog.ColoredFormatter("%(log_color)s%(name)s [%(levelname)s]: %(message)s")
+)
 
-logger = colorlog.getLogger('AMDirT')
+logger = colorlog.getLogger("AMDirT")
 logger.addHandler(handler)
 logger.propagate = False
 
@@ -29,15 +30,21 @@ def monkeypatch_get_storage_manager():
     else:
         # When running in "raw mode", we can't access the CacheStorageManager,
         # so we're falling back to InMemoryCache.
+        # https://github.com/streamlit/streamlit/issues/6620
         # _LOGGER.warning("No runtime found, using MemoryCacheStorageManager")
-        return st.runtime.caching.storage.dummy_cache_storage.MemoryCacheStorageManager()
+        return (
+            st.runtime.caching.storage.dummy_cache_storage.MemoryCacheStorageManager()
+        )
+
 
 st.runtime.caching._data_caches.get_storage_manager = monkeypatch_get_storage_manager
 
+
 def get_json_path():
     path = get_module_dir("AMDirT.assets").joinpath("tables.json")
     return path
 
+
 @st.cache_data
 def get_amdir_tags():
     r = requests.get(
@@ -87,8 +94,14 @@ def doi2bib(doi: str) -> str:
 
     return r.text
 
+
 @st.cache_data
-def get_libraries(table_name: str, samples: pd.DataFrame, libraries: pd.DataFrame, supported_archives: Iterable[str]):
+def get_libraries(
+    table_name: str,
+    samples: pd.DataFrame,
+    libraries: pd.DataFrame,
+    supported_archives: Iterable[str],
+):
     """Get filtered libraries from samples and libraries tables
 
     Args:
@@ -127,6 +140,7 @@ def get_libraries(table_name: str, samples: pd.DataFrame, libraries: pd.DataFram
 
     return selected_libraries
 
+
 def get_filename(path_string: str, orientation: str) -> Tuple[str, str]:
     """
     Get Fastq Filename from download_links column
@@ -149,8 +163,8 @@ def get_filename(path_string: str, orientation: str) -> Tuple[str, str]:
     elif orientation == "rev":
         return rev
 
-def parse_to_mag(selected_libraries):
 
+def parse_to_mag(selected_libraries):
     selected_libraries["short_reads_1"] = selected_libraries["download_links"].apply(
         get_filename, orientation="fwd"
     )
@@ -176,6 +190,7 @@ def parse_to_mag(selected_libraries):
     )
     return selected_libraries
 
+
 @st.cache_data
 def prepare_eager_table(
     samples: pd.DataFrame,
@@ -270,10 +285,14 @@ def prepare_mag_table(
     )
 
     # Create a DataFrame for "SINGLE" values
-    single_libraries = selected_libraries[selected_libraries["library_layout"] == "SINGLE"]
+    single_libraries = selected_libraries[
+        selected_libraries["library_layout"] == "SINGLE"
+    ]
 
     # Create a DataFrame for "PAIRED" values
-    paired_libraries = selected_libraries[selected_libraries["library_layout"] == "PAIRED"]
+    paired_libraries = selected_libraries[
+        selected_libraries["library_layout"] == "PAIRED"
+    ]
 
     if not single_libraries.empty:
         single_libraries = parse_to_mag(single_libraries)
@@ -282,6 +301,7 @@ def prepare_mag_table(
 
     return single_libraries, paired_libraries
 
+
 @st.cache_data
 def prepare_accession_table(
     samples: pd.DataFrame,
@@ -335,6 +355,7 @@ def prepare_accession_table(
         "aspera_script": dl_script_header + aspera_script,
     }
 
+
 @st.cache_data
 def prepare_taxprofiler_table(
     samples: pd.DataFrame,
@@ -365,26 +386,53 @@ def prepare_taxprofiler_table(
         get_filename, orientation="rev"
     )
 
-    selected_libraries["fastq_2"] = selected_libraries["fastq_2"].replace(
-        "NA", ""
-    )
+    selected_libraries["fastq_2"] = selected_libraries["fastq_2"].replace("NA", "")
 
     selected_libraries["fasta"] = ""
 
-    selected_libraries['instrument_model'] = where(selected_libraries['instrument_model'].str.lower().str.contains('illumina|nextseq|hiseq|miseq'), 'ILLUMINA',
-        where(selected_libraries['instrument_model'].str.lower().str.contains('torrent'), 'ION_TORRENT',
-        where(selected_libraries['instrument_model'].str.lower().str.contains('helicos'), 'HELICOS',
-        where(selected_libraries['instrument_model'].str.lower().str.contains('bgiseq'), 'BGISEQ',
-        where(selected_libraries['instrument_model'].str.lower().str.contains('454'), 'LS454',
-        selected_libraries['instrument_model']))))
+    selected_libraries["instrument_model"] = where(
+        selected_libraries["instrument_model"]
+        .str.lower()
+        .str.contains("illumina|nextseq|hiseq|miseq"),
+        "ILLUMINA",
+        where(
+            selected_libraries["instrument_model"].str.lower().str.contains("torrent"),
+            "ION_TORRENT",
+            where(
+                selected_libraries["instrument_model"]
+                .str.lower()
+                .str.contains("helicos"),
+                "HELICOS",
+                where(
+                    selected_libraries["instrument_model"]
+                    .str.lower()
+                    .str.contains("bgiseq"),
+                    "BGISEQ",
+                    where(
+                        selected_libraries["instrument_model"]
+                        .str.lower()
+                        .str.contains("454"),
+                        "LS454",
+                        selected_libraries["instrument_model"],
+                    ),
+                ),
+            ),
+        ),
     )
 
-    col2keep = ["sample_name", "library_name", "instrument_model", "fastq_1", "fastq_2", "fasta"]
+    col2keep = [
+        "sample_name",
+        "library_name",
+        "instrument_model",
+        "fastq_1",
+        "fastq_2",
+        "fasta",
+    ]
     selected_libraries = selected_libraries[col2keep].rename(
         columns={
             "sample_name": "sample",
             "library_name": "run_accession",
-            "instrument_model": "instrument_platform"
+            "instrument_model": "instrument_platform",
         }
     )
 

diff --git a/README.md b/README.md
@@ -20,19 +20,21 @@ For documentation on using the tool, please see [How Tos](https://amdirt.readthe
 
 ## Install
 
-### 1. With pip
+### 1. With [pip](https://pip.pypa.io/en/stable/getting-started/)
 
 ```bash
 pip install amdirt
 ```
 
 ### 2. With conda
 
-Installing AMDirT in a dedicated conda environment
+Installing AMDirT in a dedicated [conda](https://docs.conda.io/projects/miniconda/en/latest/index.html) environment
 
 ```bash
-conda create -n amdirt -c bioconda amdirt
-conda activate amdirt
+conda create -n amdirt -c bioconda amdirt #install amdirt in a dedicated conda environment
+conda activate amdirt # activate the conda environment
+# use amdirt
+conda deactivate amdirt # deactivate the conda environment
 ```
 
 ### The latest development version, directly from GitHub