@@ -105,6 +105,11 @@ def kallisto_bus(
105
105
demultiplexed : bool = False ,
106
106
batch_barcodes : bool = False ,
107
107
numreads : int = None ,
108
+ lr : bool = False ,
109
+ lr_thresh : float = 0.8 ,
110
+ lr_error_rate : float = None ,
111
+ union : bool = False ,
112
+ no_jump : bool = False ,
108
113
) -> Dict [str , str ]:
109
114
"""Runs `kallisto bus`.
110
115
@@ -133,6 +138,11 @@ def kallisto_bus(
133
138
demultiplexed: Whether FASTQs are demultiplexed, defaults to `False`
134
139
batch_barcodes: Whether sample ID should be in barcode, defaults to `False`
135
140
numreads: Maximum number of reads to process from supplied input
141
+ lr: Whether to use lr-kallisto in read mapping, defaults to `False`
142
+ lr_thresh: Sets the --threshold for lr-kallisto, defaults to `0.8`
143
+ lr_error_rate: Sets the --error-rate for lr-kallisto, defaults to `None`
144
+ union: Use set union for pseudoalignment, defaults to `False`
145
+ no_jump: Disable pseudoalignment "jumping", defaults to `False`
136
146
137
147
Returns:
138
148
Dictionary containing paths to generated files
@@ -194,6 +204,16 @@ def kallisto_bus(
194
204
command += ['--rf-stranded' ]
195
205
if inleaved :
196
206
command += ['--inleaved' ]
207
+ if lr :
208
+ command += ['--long' ]
209
+ if lr and lr_thresh :
210
+ command += ['-r' , str (lr_thresh )]
211
+ if lr and lr_error_rate :
212
+ command += ['-e' , str (lr_error_rate )]
213
+ if union :
214
+ command += ['--union' ]
215
+ if no_jump :
216
+ command += ['--no-jump' ]
197
217
if batch_barcodes :
198
218
command += ['--batch-barcodes' ]
199
219
if is_batch :
@@ -224,12 +244,14 @@ def kallisto_quant_tcc(
224
244
matrix_to_files : bool = False ,
225
245
matrix_to_directories : bool = False ,
226
246
no_fragment : bool = False ,
247
+ lr : bool = False ,
248
+ lr_platform : str = 'ONT' ,
227
249
) -> Dict [str , str ]:
228
250
"""Runs `kallisto quant-tcc`.
229
251
230
252
Args:
231
253
mtx_path: Path to counts matrix
232
- saved_index_path: Path to index.saved
254
+ saved_index_path: Path to index
233
255
ecmap_path: Path to ecmap
234
256
t2g_path: Path to T2G
235
257
out_dir: Output directory path
@@ -241,6 +263,8 @@ def kallisto_quant_tcc(
241
263
matrix_to_files: Whether to write quant-tcc output to files, defaults to `False`
242
264
matrix_to_directories: Whether to write quant-tcc output to directories, defaults to `False`
243
265
no_fragment: Whether to disable quant-tcc effective length normalization, defaults to `False`
266
+ lr: Whether to use lr-kallisto in quantification, defaults to `False`
267
+ lr_platform: Sets the --platform for lr-kallisto, defaults to `ONT`
244
268
245
269
Returns:
246
270
Dictionary containing path to output files
@@ -255,6 +279,10 @@ def kallisto_quant_tcc(
255
279
command += ['-e' , ecmap_path ]
256
280
command += ['-g' , t2g_path ]
257
281
command += ['-t' , threads ]
282
+ if lr :
283
+ command += ['--long' ]
284
+ if lr and lr_platform :
285
+ command += ['-P' , lr_platform ]
258
286
if flens_path and not no_fragment :
259
287
command += ['-f' , flens_path ]
260
288
if l and not no_fragment :
@@ -1178,6 +1206,14 @@ def count(
1178
1206
no_fragment : bool = False ,
1179
1207
numreads : int = None ,
1180
1208
store_num : bool = False ,
1209
+ lr : bool = False ,
1210
+ lr_thresh : float = 0.8 ,
1211
+ lr_error_rate : float = None ,
1212
+ lr_platform : str = 'ONT' ,
1213
+ union : bool = False ,
1214
+ no_jump : bool = False ,
1215
+ quant_umis : bool = False ,
1216
+ keep_flags : bool = False ,
1181
1217
) -> Dict [str , Union [str , Dict [str , str ]]]:
1182
1218
"""Generates count matrices for single-cell RNA seq.
1183
1219
@@ -1242,6 +1278,14 @@ def count(
1242
1278
no_fragment: Whether to disable quant-tcc effective length normalization, defaults to `False`
1243
1279
numreads: Maximum number of reads to process from supplied input
1244
1280
store_num: Whether to store read numbers in BUS file, defaults to `False`
1281
+ lr: Whether to use lr-kallisto in read mapping, defaults to `False`
1282
+ lr_thresh: Sets the --threshold for lr-kallisto, defaults to `0.8`
1283
+ lr_error_rate: Sets the --error-rate for lr-kallisto, defaults to `None`
1284
+ lr_platform: Sets the --platform for lr-kallisto, defaults to `ONT`
1285
+ union: Use set union for pseudoalignment, defaults to `False`
1286
+ no_jump: Disable pseudoalignment "jumping", defaults to `False`
1287
+ quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
1288
+ keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
1245
1289
1246
1290
Returns:
1247
1291
Dictionary containing paths to generated files
@@ -1292,7 +1336,12 @@ def count(
1292
1336
demultiplexed = demultiplexed ,
1293
1337
batch_barcodes = batch_barcodes ,
1294
1338
numreads = numreads ,
1295
- n = store_num
1339
+ n = store_num ,
1340
+ lr = lr ,
1341
+ lr_thresh = lr_thresh ,
1342
+ lr_error_rate = lr_error_rate ,
1343
+ union = union ,
1344
+ no_jump = no_jump
1296
1345
)
1297
1346
else :
1298
1347
logger .info (
@@ -1309,7 +1358,7 @@ def count(
1309
1358
temp_dir = temp_dir ,
1310
1359
threads = threads ,
1311
1360
memory = memory ,
1312
- store_num = store_num
1361
+ store_num = store_num and not keep_flags
1313
1362
)
1314
1363
correct = True
1315
1364
if whitelist_path and whitelist_path .upper () == "NONE" :
@@ -1404,6 +1453,9 @@ def update_results_with_suffix(current_results, new_results, suffix):
1404
1453
technology .upper () in ('BULK' , 'SMARTSEQ2' , 'SMARTSEQ3' )
1405
1454
) or ignore_umis
1406
1455
quant = cm and tcc
1456
+ if quant_umis :
1457
+ quant = True
1458
+ no_fragment = True
1407
1459
suffix_to_inspect_filename = {'' : '' }
1408
1460
if (technology .upper () == 'SMARTSEQ3' ):
1409
1461
suffix_to_inspect_filename = {
@@ -1518,6 +1570,8 @@ def update_results_with_suffix(current_results, new_results, suffix):
1518
1570
matrix_to_files = matrix_to_files ,
1519
1571
matrix_to_directories = matrix_to_directories ,
1520
1572
no_fragment = no_fragment ,
1573
+ lr = lr ,
1574
+ lr_platform = lr_platform ,
1521
1575
)
1522
1576
update_results_with_suffix (
1523
1577
unfiltered_results , quant_result , suffix
@@ -1695,6 +1749,14 @@ def count_nac(
1695
1749
batch_barcodes : bool = False ,
1696
1750
numreads : int = None ,
1697
1751
store_num : bool = False ,
1752
+ lr : bool = False ,
1753
+ lr_thresh : float = 0.8 ,
1754
+ lr_error_rate : float = None ,
1755
+ lr_platform : str = 'ONT' ,
1756
+ union : bool = False ,
1757
+ no_jump : bool = False ,
1758
+ quant_umis : bool = False ,
1759
+ keep_flags : bool = False ,
1698
1760
) -> Dict [str , Union [Dict [str , str ], str ]]:
1699
1761
"""Generates RNA velocity matrices for single-cell RNA seq.
1700
1762
@@ -1756,6 +1818,14 @@ def count_nac(
1756
1818
batch_barcodes: Whether sample ID should be in barcode, defaults to `False`
1757
1819
numreads: Maximum number of reads to process from supplied input
1758
1820
store_num: Whether to store read numbers in BUS file, defaults to `False`
1821
+ lr: Whether to use lr-kallisto in read mapping, defaults to `False`
1822
+ lr_thresh: Sets the --threshold for lr-kallisto, defaults to `0.8`
1823
+ lr_error_rate: Sets the --error-rate for lr-kallisto, defaults to `None`
1824
+ lr_platform: Sets the --platform for lr-kallisto, defaults to `ONT`
1825
+ union: Use set union for pseudoalignment, defaults to `False`
1826
+ no_jump: Disable pseudoalignment "jumping", defaults to `False`
1827
+ quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
1828
+ keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
1759
1829
1760
1830
Returns:
1761
1831
Dictionary containing path to generated index
@@ -1803,7 +1873,12 @@ def count_nac(
1803
1873
demultiplexed = demultiplexed ,
1804
1874
batch_barcodes = batch_barcodes ,
1805
1875
numreads = numreads ,
1806
- n = store_num
1876
+ n = store_num ,
1877
+ lr = lr ,
1878
+ lr_thresh = lr_thresh ,
1879
+ lr_error_rate = lr_error_rate ,
1880
+ union = union ,
1881
+ no_jump = no_jump
1807
1882
)
1808
1883
else :
1809
1884
logger .info (
@@ -1820,7 +1895,7 @@ def count_nac(
1820
1895
temp_dir = temp_dir ,
1821
1896
threads = threads ,
1822
1897
memory = memory ,
1823
- store_num = store_num
1898
+ store_num = store_num and not keep_flags
1824
1899
)
1825
1900
correct = True
1826
1901
if whitelist_path and whitelist_path .upper () == "NONE" :
@@ -2073,8 +2148,8 @@ def update_results_with_suffix(current_results, new_results, suffix):
2073
2148
if batch_barcodes else None for prefix in prefixes
2074
2149
],
2075
2150
genes_paths = [
2076
- unfiltered_results [prefix ][f'txnames { suffix } ' ] if tcc
2077
- else unfiltered_results [prefix ].get (f'genes{ suffix } ' )
2151
+ unfiltered_results [prefix ][f'ec { suffix } ' ] if tcc else
2152
+ unfiltered_results [prefix ].get (f'genes{ suffix } ' )
2078
2153
for prefix in prefixes
2079
2154
],
2080
2155
t2g_path = t2g_path ,
@@ -2975,7 +3050,7 @@ def update_results_with_suffix(current_results, new_results, suffix):
2975
3050
for prefix in prefixes
2976
3051
],
2977
3052
genes_paths = [
2978
- unfiltered_results [prefix ][f'txnames { suffix } ' ] if tcc else
3053
+ unfiltered_results [prefix ][f'ec { suffix } ' ] if tcc else
2979
3054
unfiltered_results [prefix ].get (f'genes{ suffix } ' )
2980
3055
for prefix in prefixes
2981
3056
],
0 commit comments