Skip to content

Commit 6095416

Browse files
committed
fix some lint
1 parent f318603 commit 6095416

File tree

4 files changed

+63
-29
lines changed

4 files changed

+63
-29
lines changed

kb_python/config.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ def get_provided_kallisto_path() -> Optional[str]:
3737
bin_name = 'kallisto'
3838
if '_KALLISTO_OPTOFF' in globals():
3939
if _KALLISTO_OPTOFF:
40-
bin_name=f'{bin_name}_optoff'
40+
bin_name = f'{bin_name}_optoff'
4141
if '_KALLISTO_KMER_64' in globals():
4242
if _KALLISTO_KMER_64:
43-
bin_name=f'{bin_name}_k64'
43+
bin_name = f'{bin_name}_k64'
4444
bin_filename = f'{bin_name}.exe' if PLATFORM == 'windows' else bin_name
4545
path = os.path.join(BINS_DIR, PLATFORM, CPU, 'kallisto', bin_filename)
4646
if not os.path.isfile(path):
@@ -60,12 +60,14 @@ def get_provided_bustools_path() -> Optional[str]:
6060
return None
6161
return path
6262

63+
6364
def set_special_kallisto_binary(k64: bool, optoff: bool):
6465
global _KALLISTO_KMER_64
6566
global _KALLISTO_OPTOFF
6667
_KALLISTO_KMER_64 = k64
6768
_KALLISTO_OPTOFF = optoff
6869

70+
6971
def get_compiled_kallisto_path(alias: str = COMPILED_DIR) -> Optional[str]:
7072
"""Finds platform-dependent kallisto binary compiled with `compile`.
7173

kb_python/extract.py

+38-18
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,15 @@ def bustools_extract(
8282
run_executable(command)
8383
return {"bus": out_path}
8484

85+
8586
def is_gzipped(file_path):
8687
"""
8788
Checks if a file is gzipped by reading its magic number.
8889
"""
8990
with open(file_path, 'rb') as file:
9091
return file.read(2) == b'\x1f\x8b'
9192

93+
9294
def read_headers_from_fastq(fastq_file):
9395
"""
9496
Reads headers from a FASTQ file and returns a set of headers.
@@ -188,22 +190,22 @@ def remove_mm_from_bus(t2g_path, txnames, temp_dir, bus_in):
188190
ecs_mm, _ = get_mm_ecs(t2g_path, txnames, temp_dir)
189191

190192
if len(ecs_mm) > 0:
191-
## Remove mm ecs from bus file
193+
# Remove mm ecs from bus file
192194
bus_txt = os.path.join(temp_dir, "output.bus.txt")
193195
bus_txt_no_mm = os.path.join(temp_dir, "output_no_mm.bus.txt")
194196
bus_no_mm = os.path.join(temp_dir, "output_no_mm.bus")
195-
197+
196198
# Convert bus to txt file
197199
bustools_text(bus_path=bus_in, out_path=bus_txt, flags=True)
198-
200+
199201
# Remove mm ecs
200202
bus_df = pd.read_csv(bus_txt, sep="\t", header=None)
201203
new_bus_df = bus_df[~bus_df[2].isin(ecs_mm)]
202204
new_bus_df.to_csv(bus_txt_no_mm, sep="\t", index=False, header=None)
203-
205+
204206
# Convert back to bus format
205207
bustools_fromtext(txt_path=bus_txt_no_mm, out_path=bus_no_mm)
206-
208+
207209
logger.debug(
208210
f"BUS file without equivalence classes that map to multiple genes saved at {bus_no_mm}"
209211
)
@@ -223,7 +225,8 @@ def remove_mm_from_mc(t2g_path, txnames, temp_dir):
223225
ecmap_no_mm = os.path.join(temp_dir, "matrix_no_mm.ec")
224226

225227
logger.debug(
226-
f"Replacing transcript entries with -1 for equivalence classes that map to multiple genes from {os.path.join(temp_dir, 'matrix.ec')}"
228+
f"Replacing transcript entries with -1 for equivalence classes "
229+
"that map to multiple genes from {os.path.join(temp_dir, 'matrix.ec')}"
227230
)
228231

229232
# Get multimapped equivalence classes
@@ -233,9 +236,10 @@ def remove_mm_from_mc(t2g_path, txnames, temp_dir):
233236
# Replace transcript entries for multimapped equivalence classes with -1
234237
ec_df.loc[ec_df[0].isin(ecs_mm), 1] = -1
235238
ec_df.to_csv(ecmap_no_mm, sep="\t", index=False, header=None)
236-
239+
237240
logger.debug(
238-
f"matrix.ec file where transcript entries were replaced with -1 for equivalence classes that map to multiple genes saved at {ecmap_no_mm}"
241+
f"matrix.ec file where transcript entries were replaced with -1 for "
242+
"equivalence classes that map to multiple genes saved at {ecmap_no_mm}"
239243
)
240244

241245
return ecmap_no_mm
@@ -271,11 +275,19 @@ def extract(
271275
targets: Gene or transcript names for which to extract the raw reads that align to the index
272276
out_dir: Path to output directory
273277
target_type: 'gene' (default) or 'transcript' -> Defines whether targets are gene or transcript names
274-
extract_all: Extracts reads for all genes or transcripts (as defined in target_type), defaults to `False`. Might take a long time to run when the reference index contains a large number of genes. Set targets = None when using extract_all
275-
extract_all_fast: Extracts all pseudo-aligned reads, defaults to `False`. Does not break down output by gene/transcript. Set targets = None when using extract_all_fast
276-
extract_all_unmapped: Extracts all unmapped reads, defaults to `False`. Set targets = None when using extract_all_unmapped
278+
extract_all: Extracts reads for all genes or transcripts (as defined in target_type), defaults to `False`.
279+
Might take a long time to run when the reference index contains a large number of genes.
280+
Set targets = None when using extract_all
281+
extract_all_fast: Extracts all pseudo-aligned reads, defaults to `False`.
282+
Does not break down output by gene/transcript.
283+
Set targets = None when using extract_all_fast
284+
extract_all_unmapped: Extracts all unmapped reads, defaults to `False`.
285+
Set targets = None when using extract_all_unmapped
277286
mm: Also extract reads that multi-mapped to several genes, defaults to `False`
278-
t2g_path: Path to transcript-to-gene mapping file (required when mm = False, target_type = 'gene' (and extract_all_fast and extract_all_unmapped = False), OR extract_all = True)
287+
t2g_path: Path to transcript-to-gene mapping file
288+
(required when mm = False, target_type = 'gene'
289+
(and extract_all_fast and extract_all_unmapped = False),
290+
OR extract_all = True)
279291
temp_dir: Path to temporary directory, defaults to `tmp`
280292
threads: Number of threads to use, defaults to `8`
281293
aa: Align to index generated from a FASTA-file containing amino acid sequences, defaults to `False`
@@ -287,19 +299,21 @@ def extract(
287299
"""
288300
if sum([extract_all, extract_all_fast, extract_all_unmapped]) > 1:
289301
raise ValueError(
290-
f"extract_all, extract_all_fast, and/or extract_all_unmapped cannot be used simultaneously"
302+
"extract_all, extract_all_fast, and/or extract_all_unmapped cannot be used simultaneously"
291303
)
292304

293305
if targets is None and not (
294306
extract_all or extract_all_fast or extract_all_unmapped
295307
):
296308
raise ValueError(
297-
f"targets must be provided (unless extract_all, extract_all_fast, or extract_all_unmapped are used to extract all reads)"
309+
"targets must be provided "
310+
"(unless extract_all, extract_all_fast, or extract_all_unmapped are used to extract all reads)"
298311
)
299312

300313
if targets and (extract_all or extract_all_fast or extract_all_unmapped):
301314
logger.warning(
302-
f"targets will be ignored since extract_all, extract_all_fast, or extract_all_unmapped is activated which will extract all reads"
315+
"targets will be ignored since extract_all, extract_all_fast, or extract_all_unmapped "
316+
"is activated which will extract all reads"
303317
)
304318

305319
if target_type not in ["gene", "transcript"]:
@@ -313,14 +327,16 @@ def extract(
313327
or extract_all
314328
) and (t2g_path is None):
315329
raise ValueError(
316-
"t2g_path must be provided if mm flag is not provided, target_type is 'gene' (and extract_all_fast and extract_all_unmapped are False), OR extract_all is True"
330+
"t2g_path must be provided if mm flag is not provided, target_type is 'gene' "
331+
"(and extract_all_fast and extract_all_unmapped are False), OR extract_all is True"
317332
)
318333

319334
# extract_all_unmapped requires bustools version > 0.43.2 since previous versions have a bug in the output fastq format that changes the sequence headers
320335
bustools_version_tuple = get_bustools_version()
321336
if extract_all_unmapped and not (0, 43, 2) < bustools_version_tuple:
322337
raise ValueError(
323-
f"extract_all_unmapped requires bustools version > 0.43.2. You are currently using bustools version {'.'.join(str(i) for i in bustools_version_tuple)}."
338+
f"extract_all_unmapped requires bustools version > 0.43.2. "
339+
"You are currently using bustools version {'.'.join(str(i) for i in bustools_version_tuple)}."
324340
)
325341

326342
make_directory(out_dir)
@@ -379,7 +395,11 @@ def extract(
379395
# Save unmapped reads in a separate fastq file
380396
unmapped_fastq = os.path.join(out_dir, "all_unmapped/1.fastq.gz")
381397
mapped_fastq = os.path.join(extract_out_folder, "1.fastq.gz")
382-
extract_matching_reads_by_header(mapped_fastq, fastq[0] if isinstance(fastq, list) else fastq, unmapped_fastq)
398+
extract_matching_reads_by_header(
399+
mapped_fastq,
400+
fastq[0] if isinstance(fastq, list) else fastq,
401+
unmapped_fastq
402+
)
383403

384404
else:
385405
if not mm:

kb_python/main.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -1597,8 +1597,14 @@ def setup_extract_args(
15971597

15981598
parser_extract = parser.add_parser(
15991599
'extract',
1600-
description='Extract sequencing reads that were pseudoaligned to specific genes/transcripts (or extract all reads that were / were not pseudoaligned).',
1601-
help='Extract sequencing reads that were pseudoaligned to specific genes/transcripts (or extract all reads that were / were not pseudoaligned)',
1600+
description=(
1601+
'Extract sequencing reads that were pseudoaligned to specific genes/transcripts '
1602+
'(or extract all reads that were / were not pseudoaligned).'
1603+
),
1604+
help=(
1605+
'Extract sequencing reads that were pseudoaligned to specific genes/transcripts '
1606+
'(or extract all reads that were / were not pseudoaligned)'
1607+
),
16021608
parents=[parent]
16031609
)
16041610
parser_extract._actions[0].help = parser_extract._actions[
@@ -1611,7 +1617,8 @@ def setup_extract_args(
16111617
type=str,
16121618
help=(
16131619
'Single fastq file containing the sequencing reads (e.g. in case of 10x data, provide the R2 file).'
1614-
' Sequencing technology will be treated as bulk here since barcode and UMI tracking is not necessary to extract reads.'
1620+
' Sequencing technology will be treated as bulk here since barcode and UMI tracking '
1621+
'is not necessary to extract reads.'
16151622
)
16161623
)
16171624
required_extract.add_argument(
@@ -1643,16 +1650,19 @@ def setup_extract_args(
16431650
parser_extract.add_argument(
16441651
'--extract_all',
16451652
help=(
1646-
'Extracts all reads that pseudo-aligned to any gene or transcript (as defined by target_type) (breaks down output by gene/transcript). '
1647-
'Using extract_all might take a long time to run when there are a large number of genes/transcripts in the index.'
1653+
'Extracts all reads that pseudo-aligned to any gene or transcript (as defined by target_type) '
1654+
'(breaks down output by gene/transcript). '
1655+
'Using extract_all might take a long time to run when there are a large number of '
1656+
'genes/transcripts in the index.'
16481657
),
16491658
action='store_true',
16501659
default=False
16511660
)
16521661
parser_extract.add_argument(
16531662
'--extract_all_fast',
16541663
help=(
1655-
'Extracts all reads that pseudo-aligned (does not break down output by gene/transcript; output saved in the "all" folder).'
1664+
'Extracts all reads that pseudo-aligned (does not break down output by gene/transcript; '
1665+
'output saved in the "all" folder).'
16561666
),
16571667
action='store_true',
16581668
default=False
@@ -1677,7 +1687,9 @@ def setup_extract_args(
16771687
'-g',
16781688
metavar='T2G',
16791689
help=(
1680-
'Path to transcript-to-gene mapping file (required when mm = False, target_type = "gene" (and extract_all_fast and extract_all_unmapped = False), OR extract_all = True).'
1690+
'Path to transcript-to-gene mapping file '
1691+
'(required when mm = False, target_type = "gene" '
1692+
'(and extract_all_fast and extract_all_unmapped = False), OR extract_all = True).'
16811693
),
16821694
type=str,
16831695
)
@@ -1837,7 +1849,7 @@ def main():
18371849
# Set binary paths
18381850
if args.command in ('ref', 'count', 'extract') and ('dry_run' not in args
18391851
or not args.dry_run):
1840-
1852+
18411853
use_kmer64 = False
18421854
opt_off = False
18431855
if args.k and args.k > 32:

kb_python/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ def overlay_anndatas(
721721
ambiguous_intersection = adata_ambiguous[obs_idx][:, var_idx]
722722
a_layers.update({'ambiguous': ambiguous_intersection.X})
723723
sum_X = sum_X + ambiguous_intersection.X
724-
724+
725725
df_obs = unspliced_intersection.obs
726726
df_var = unspliced_intersection.var
727727
return anndata.AnnData(

0 commit comments

Comments
 (0)