You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
f"matrix.ec file where transcript entries were replaced with -1 for equivalence classes that map to multiple genes saved at {ecmap_no_mm}"
241
+
f"matrix.ec file where transcript entries were replaced with -1 for "
242
+
"equivalence classes that map to multiple genes saved at {ecmap_no_mm}"
239
243
)
240
244
241
245
returnecmap_no_mm
@@ -271,11 +275,19 @@ def extract(
271
275
targets: Gene or transcript names for which to extract the raw reads that align to the index
272
276
out_dir: Path to output directory
273
277
target_type: 'gene' (default) or 'transcript' -> Defines whether targets are gene or transcript names
274
-
extract_all: Extracts reads for all genes or transcripts (as defined in target_type), defaults to `False`. Might take a long time to run when the reference index contains a large number of genes. Set targets = None when using extract_all
275
-
extract_all_fast: Extracts all pseudo-aligned reads, defaults to `False`. Does not break down output by gene/transcript. Set targets = None when using extract_all_fast
276
-
extract_all_unmapped: Extracts all unmapped reads, defaults to `False`. Set targets = None when using extract_all_unmapped
278
+
extract_all: Extracts reads for all genes or transcripts (as defined in target_type), defaults to `False`.
279
+
Might take a long time to run when the reference index contains a large number of genes.
280
+
Set targets = None when using extract_all
281
+
extract_all_fast: Extracts all pseudo-aligned reads, defaults to `False`.
282
+
Does not break down output by gene/transcript.
283
+
Set targets = None when using extract_all_fast
284
+
extract_all_unmapped: Extracts all unmapped reads, defaults to `False`.
285
+
Set targets = None when using extract_all_unmapped
277
286
mm: Also extract reads that multi-mapped to several genes, defaults to `False`
278
-
t2g_path: Path to transcript-to-gene mapping file (required when mm = False, target_type = 'gene' (and extract_all_fast and extract_all_unmapped = False), OR extract_all = True)
287
+
t2g_path: Path to transcript-to-gene mapping file
288
+
(required when mm = False, target_type = 'gene'
289
+
(and extract_all_fast and extract_all_unmapped = False),
290
+
OR extract_all = True)
279
291
temp_dir: Path to temporary directory, defaults to `tmp`
280
292
threads: Number of threads to use, defaults to `8`
281
293
aa: Align to index generated from a FASTA-file containing amino acid sequences, defaults to `False`
f"targets will be ignored since extract_all, extract_all_fast, or extract_all_unmapped is activated which will extract all reads"
315
+
"targets will be ignored since extract_all, extract_all_fast, or extract_all_unmapped "
316
+
"is activated which will extract all reads"
303
317
)
304
318
305
319
iftarget_typenotin ["gene", "transcript"]:
@@ -313,14 +327,16 @@ def extract(
313
327
orextract_all
314
328
) and (t2g_pathisNone):
315
329
raiseValueError(
316
-
"t2g_path must be provided if mm flag is not provided, target_type is 'gene' (and extract_all_fast and extract_all_unmapped are False), OR extract_all is True"
330
+
"t2g_path must be provided if mm flag is not provided, target_type is 'gene' "
331
+
"(and extract_all_fast and extract_all_unmapped are False), OR extract_all is True"
317
332
)
318
333
319
334
# extract_all_unmapped requires bustools version > 0.43.2 since previous versions have a bug in the output fastq format that changes the sequence headers
Copy file name to clipboardexpand all lines: kb_python/main.py
+20-8
Original file line number
Diff line number
Diff line change
@@ -1597,8 +1597,14 @@ def setup_extract_args(
1597
1597
1598
1598
parser_extract=parser.add_parser(
1599
1599
'extract',
1600
-
description='Extract sequencing reads that were pseudoaligned to specific genes/transcripts (or extract all reads that were / were not pseudoaligned).',
1601
-
help='Extract sequencing reads that were pseudoaligned to specific genes/transcripts (or extract all reads that were / were not pseudoaligned)',
1600
+
description=(
1601
+
'Extract sequencing reads that were pseudoaligned to specific genes/transcripts '
1602
+
'(or extract all reads that were / were not pseudoaligned).'
1603
+
),
1604
+
help=(
1605
+
'Extract sequencing reads that were pseudoaligned to specific genes/transcripts '
1606
+
'(or extract all reads that were / were not pseudoaligned)'
'Single fastq file containing the sequencing reads (e.g. in case of 10x data, provide the R2 file).'
1614
-
' Sequencing technology will be treated as bulk here since barcode and UMI tracking is not necessary to extract reads.'
1620
+
' Sequencing technology will be treated as bulk here since barcode and UMI tracking '
1621
+
'is not necessary to extract reads.'
1615
1622
)
1616
1623
)
1617
1624
required_extract.add_argument(
@@ -1643,16 +1650,19 @@ def setup_extract_args(
1643
1650
parser_extract.add_argument(
1644
1651
'--extract_all',
1645
1652
help=(
1646
-
'Extracts all reads that pseudo-aligned to any gene or transcript (as defined by target_type) (breaks down output by gene/transcript). '
1647
-
'Using extract_all might take a long time to run when there are a large number of genes/transcripts in the index.'
1653
+
'Extracts all reads that pseudo-aligned to any gene or transcript (as defined by target_type) '
1654
+
'(breaks down output by gene/transcript). '
1655
+
'Using extract_all might take a long time to run when there are a large number of '
1656
+
'genes/transcripts in the index.'
1648
1657
),
1649
1658
action='store_true',
1650
1659
default=False
1651
1660
)
1652
1661
parser_extract.add_argument(
1653
1662
'--extract_all_fast',
1654
1663
help=(
1655
-
'Extracts all reads that pseudo-aligned (does not break down output by gene/transcript; output saved in the "all" folder).'
1664
+
'Extracts all reads that pseudo-aligned (does not break down output by gene/transcript; '
1665
+
'output saved in the "all" folder).'
1656
1666
),
1657
1667
action='store_true',
1658
1668
default=False
@@ -1677,7 +1687,9 @@ def setup_extract_args(
1677
1687
'-g',
1678
1688
metavar='T2G',
1679
1689
help=(
1680
-
'Path to transcript-to-gene mapping file (required when mm = False, target_type = "gene" (and extract_all_fast and extract_all_unmapped = False), OR extract_all = True).'
1690
+
'Path to transcript-to-gene mapping file '
1691
+
'(required when mm = False, target_type = "gene" '
1692
+
'(and extract_all_fast and extract_all_unmapped = False), OR extract_all = True).'
1681
1693
),
1682
1694
type=str,
1683
1695
)
@@ -1837,7 +1849,7 @@ def main():
1837
1849
# Set binary paths
1838
1850
ifargs.commandin ('ref', 'count', 'extract') and ('dry_run'notinargs
0 commit comments