From 923955b5e4426e57339c4a7ed9b5a13185e4124c Mon Sep 17 00:00:00 2001
From: pconesa
Date: Thu, 5 Dec 2019 10:20:19 +0100
Subject: [PATCH] entry point for picking
---
aitom/cmd/__init__.py | 0
aitom/cmd/picking.py | 126 ++++++++++++++++++++++++++++++++++++++++++
setup.py | 7 ++-
3 files changed, 132 insertions(+), 1 deletion(-)
create mode 100644 aitom/cmd/__init__.py
create mode 100644 aitom/cmd/picking.py
diff --git a/aitom/cmd/__init__.py b/aitom/cmd/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/aitom/cmd/picking.py b/aitom/cmd/picking.py
new file mode 100644
index 0000000..58322ff
--- /dev/null
+++ b/aitom/cmd/picking.py
@@ -0,0 +1,126 @@
+'''
+a tutorial on using particle picking
+
+Reference:
+Pei et al. Simulating cryo electron tomograms of crowded cell cytoplasm for assessment of automated particle picking
+https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1283-3
+'''
+import sys
+
+from aitom.pick.dog.particle_picking_dog__util import peak__partition
+from aitom.pick.dog.particle_picking_dog__filter import do_filter
+import os
+import json
+import aitom.io.file as io_file
+import aitom.image.vol.util as im_vol_util
+from bisect import bisect
+from pprint import pprint
+
+def picking(path, s1, s2, t, find_maxima=True, partition_op=None, multiprocessing_process_num=0):
+ '''
+ parameters:
+ path:file path s1:sigma1 s2:sigma2 t:threshold level find_maxima:peaks appears at the maximum/minimum multiprocessing_process_num: number of multiporcessing
+ partition_op: partition the volume for multithreading, is a dict consists 'nonoverlap_width', 'overlap_width' and 'save_vg'
+ # Take a two-dimensional image as an example, if the image size is 210*150(all in pixels), nonoverlap_width is 60 and overlap_width is 30.
+ # It will be divided into 6 pieces for different threads to process. The ranges of their X and Y are
+ # (first line) (0-90)*(0-90) (60-150)*(0-90) (120-210)*(0-90) (0-90)
+ # (second line) (0-90)*(60-150) (60-150)*(60-150) (120-210)*(60-150)
+ In general, s2=1.1*s1, s1 and t depend on particle size and noise. In practice, s1 should be roughly equal to the particle radius(in pixels). In related paper, the model achieves highest comprehensive score when s1=7 and t=3.
+
+ return:
+ a list including all peaks information (in descending order of value), each element in the return list looks like:
+ {'val': 281.4873046875, 'x': [1178, 1280, 0], 'uuid': '6ad66107-088c-471e-b65f-0b3b2fdc35b0'}
+ 'val' is the score of the peak when picking, only the score is higher than the threshold will the peak be selected.
+ 'x' is the center of the peak in the tomogram.
+ 'uuid' is an unique id for each peak.
+ '''
+ a = io_file.read_mrc_data(path)
+ print("file has been read")
+ temp = im_vol_util.cub_img(a)
+ a_im = temp['im'] # image data
+ a_vt = temp['vt'] # volume data
+
+ # using DoG to detect all peaks, may contain peaks caused by noise
+ peaks = peak__partition(a_vt, s1=s1, s2=s2, find_maxima=find_maxima, partition_op=partition_op, multiprocessing_process_num=multiprocessing_process_num)
+
+ # calculate threshold T and delete peaks whose val are smaller than threshold
+ # Related paper: Pei L, Xu M, Frazier Z, Alber F. Simulating Cryo-Electron Tomograms of Crowded Mixtures of Macromolecular Complexes and Assessment of Particle Picking. BMC Bioinformatics. 2016; 17: 405.
+ M = peaks[0]['val'] # max val of all peaks
+ m = peaks[len(peaks)-1]['val'] # min val of all peaks
+ T = m+t*(M-m)/20
+ peak_vals_neg = [-peak['val']*find_maxima for peak in peaks]
+ res = peaks[:bisect(peak_vals_neg, -T*find_maxima)-1]
+ assert res[-1]['val'] >= T
+ print("T=m+t*(M-m)/20 \nT=%f m=%f t=%f M=%f" %(T,m,t,M))
+ return res
+
+def printUsage():
+ print("Usage: "
+ "This script will use aiTom picking to pick a tomogram.\n"
+ "Invoke this file passing the PATH to a tomogram and the name of the output\n"
+ "Example: particle_picking.py /tmp/mytomogram.mrc /tmp/coordinates3D.json")
+
+def getParams():
+
+ # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp
+ if len(sys.argv) != 3:
+ printUsage()
+ raise AttributeError(
+ "Wrong number of parameters. 2 expected, %s found: %s" % (
+ len(sys.argv) - 1, sys.argv[1:]))
+
+ path = sys.argv[1]
+
+ if not os.path.exists(path):
+ raise FileNotFoundError("File %s does not exists." % path)
+
+ output = sys.argv[2]
+
+ return path, output
+
+def main():
+
+ path, output = getParams()
+
+ # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution
+ # crop_path = 'cropped.mrc'
+ # crop_mrc(path, crop_path)
+
+ mrc_header = io_file.read_mrc_header(path)
+ voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC']['nx'] / 10
+ print ("voxel_spacing_in_nm: %s" % voxel_spacing_in_nm)
+
+ # Note: with our test data, voxel_spacing_in_nm has 0 and next division fails.
+ sigma1 = 2
+ try:
+ sigma1 = max(int(7 / voxel_spacing_in_nm), sigma1) # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2
+ except Exception as e:
+ pass
+
+ print('sigma1=%d' %sigma1)
+ # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1.
+ # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels)
+ # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1)
+
+ partition_op = {'nonoverlap_width': sigma1*20, 'overlap_width': sigma1*10, 'save_vg': False}
+ result = picking(path, s1=sigma1, s2=sigma1*1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=100)
+ print("%d particles detected, containing redundant peaks" % len(result))
+ result = do_filter(pp=result, peak_dist_min=sigma1, op=None) # remove redundant peaks
+ print("peak number reduced to %d" % len(result))
+ pprint(result[:5])
+
+ json_data=[] # generate file for 3dmod
+ for i in range(len(result)):
+ loc_np=result[i]['x']
+ loc=[]
+ for j in range(len(loc_np)):
+ loc.append(loc_np[j].tolist())
+ json_data.append({'peak':{'loc':loc}})
+
+ with open(output,'w') as f:
+ json.dump(json_data,f)
+
+
+if __name__ == '__main__':
+ main()
+
diff --git a/setup.py b/setup.py
index 38871f0..e8ae72e 100644
--- a/setup.py
+++ b/setup.py
@@ -71,4 +71,9 @@ def get_packages(root_dir='aitom', exclude_dir_roots=['aitom/tomominer/core/src'
packages=get_packages(),
package_dir={'aitom': 'aitom',
'aitom.tomominer.core': 'aitom/tomominer/core/', },
- cmdclass={'build_ext': build_ext, })
+ cmdclass={'build_ext': build_ext, },
+ entry_points={
+ 'console_scripts': [
+ 'picking = aitom.cmd.picking:main',
+ ]}
+ )