From 923955b5e4426e57339c4a7ed9b5a13185e4124c Mon Sep 17 00:00:00 2001
From: pconesa <p.conesa.mingo@gmail.com>
Date: Thu, 5 Dec 2019 10:20:19 +0100
Subject: [PATCH] entry point for picking

---
 aitom/cmd/__init__.py |   0
 aitom/cmd/picking.py  | 126 ++++++++++++++++++++++++++++++++++++++++++
 setup.py              |   7 ++-
 3 files changed, 132 insertions(+), 1 deletion(-)
 create mode 100644 aitom/cmd/__init__.py
 create mode 100644 aitom/cmd/picking.py

diff --git a/aitom/cmd/__init__.py b/aitom/cmd/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/aitom/cmd/picking.py b/aitom/cmd/picking.py
new file mode 100644
index 0000000..58322ff
--- /dev/null
+++ b/aitom/cmd/picking.py
@@ -0,0 +1,126 @@
+'''
+a tutorial on using particle picking
+
+Reference:
+Pei et al. Simulating cryo electron tomograms of crowded cell cytoplasm for assessment of automated particle picking
+https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1283-3
+'''
+import sys
+
+from aitom.pick.dog.particle_picking_dog__util import peak__partition
+from aitom.pick.dog.particle_picking_dog__filter import do_filter
+import os
+import json
+import aitom.io.file as io_file
+import aitom.image.vol.util as im_vol_util
+from bisect import bisect
+from pprint import pprint
+
+def picking(path, s1, s2, t, find_maxima=True, partition_op=None, multiprocessing_process_num=0):
+    '''
+    parameters:
+    path:file path  s1:sigma1  s2:sigma2  t:threshold level  find_maxima:peaks appears at the maximum/minimum  multiprocessing_process_num: number of multiporcessing
+    partition_op: partition the volume for multithreading, is a dict consists 'nonoverlap_width', 'overlap_width' and 'save_vg'
+    # Take a two-dimensional image as an example, if the image size is 210*150(all in pixels), nonoverlap_width is 60 and overlap_width is 30.
+    # It will be divided into 6 pieces for different threads to process. The ranges of their X and Y are
+    # (first line)  (0-90)*(0-90) (60-150)*(0-90) (120-210)*(0-90) (0-90)
+    # (second line) (0-90)*(60-150) (60-150)*(60-150) (120-210)*(60-150)
+    In general, s2=1.1*s1, s1 and t depend on particle size and noise. In practice, s1 should be roughly equal to the particle radius(in pixels). In related paper, the model achieves highest comprehensive score when s1=7 and t=3. 
+
+    return:
+    a list including all peaks information (in descending order of value),  each element in the return list looks like: 
+    {'val': 281.4873046875, 'x': [1178, 1280, 0], 'uuid': '6ad66107-088c-471e-b65f-0b3b2fdc35b0'}
+    'val' is the score of the peak when picking, only the score is higher than the threshold will the peak be selected.
+    'x' is the center of the peak in the tomogram.
+    'uuid' is an unique id for each peak.
+    '''
+    a = io_file.read_mrc_data(path)
+    print("file has been read")
+    temp = im_vol_util.cub_img(a)
+    a_im = temp['im'] # image data
+    a_vt = temp['vt'] # volume data
+
+    # using DoG to detect all peaks, may contain peaks caused by noise
+    peaks = peak__partition(a_vt, s1=s1, s2=s2, find_maxima=find_maxima, partition_op=partition_op, multiprocessing_process_num=multiprocessing_process_num) 
+    
+    # calculate threshold T and delete peaks whose val are smaller than threshold
+    # Related paper: Pei L, Xu M, Frazier Z, Alber F. Simulating Cryo-Electron Tomograms of Crowded Mixtures of Macromolecular Complexes and Assessment of Particle Picking. BMC Bioinformatics. 2016; 17: 405.
+    M = peaks[0]['val'] # max val of all peaks
+    m = peaks[len(peaks)-1]['val'] # min val of all peaks
+    T = m+t*(M-m)/20
+    peak_vals_neg = [-peak['val']*find_maxima for peak in peaks]
+    res = peaks[:bisect(peak_vals_neg, -T*find_maxima)-1]
+    assert res[-1]['val'] >= T
+    print("T=m+t*(M-m)/20 \nT=%f m=%f t=%f M=%f" %(T,m,t,M))
+    return res
+
+def printUsage():
+    print("Usage: "
+          "This script will use aiTom picking to pick a tomogram.\n"
+          "Invoke this file passing the PATH to a tomogram and the name of the output\n"
+          "Example: particle_picking.py /tmp/mytomogram.mrc /tmp/coordinates3D.json")
+
+def getParams():
+
+    # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp
+    if len(sys.argv) != 3:
+        printUsage()
+        raise AttributeError(
+            "Wrong number of parameters. 2 expected, %s found: %s" % (
+            len(sys.argv) - 1, sys.argv[1:]))
+
+    path = sys.argv[1]
+
+    if not os.path.exists(path):
+        raise FileNotFoundError("File %s does not exists." % path)
+
+    output = sys.argv[2]
+
+    return path, output
+
+def main():
+
+    path, output = getParams()
+
+    # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution
+    # crop_path = 'cropped.mrc'
+    # crop_mrc(path, crop_path)
+    
+    mrc_header = io_file.read_mrc_header(path)
+    voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC']['nx'] / 10
+    print ("voxel_spacing_in_nm: %s" % voxel_spacing_in_nm)
+
+    # Note: with our test data, voxel_spacing_in_nm has 0 and next division fails.
+    sigma1 = 2
+    try:
+        sigma1 = max(int(7 / voxel_spacing_in_nm), sigma1) # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2
+    except Exception as e:
+        pass
+
+    print('sigma1=%d' %sigma1)
+    # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1.
+    # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels)
+    # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1)
+    
+    partition_op = {'nonoverlap_width': sigma1*20, 'overlap_width': sigma1*10, 'save_vg': False}
+    result = picking(path, s1=sigma1, s2=sigma1*1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=100)
+    print("%d particles detected, containing redundant peaks" % len(result))
+    result = do_filter(pp=result, peak_dist_min=sigma1, op=None)  # remove redundant peaks
+    print("peak number reduced to %d" % len(result))
+    pprint(result[:5])
+
+    json_data=[] # generate file for 3dmod
+    for i in range(len(result)):
+        loc_np=result[i]['x']
+        loc=[]
+        for j in range(len(loc_np)):
+            loc.append(loc_np[j].tolist())    
+        json_data.append({'peak':{'loc':loc}}) 
+
+    with open(output,'w') as f:
+        json.dump(json_data,f)
+
+    
+if __name__ == '__main__':
+    main()
+
diff --git a/setup.py b/setup.py
index 38871f0..e8ae72e 100644
--- a/setup.py
+++ b/setup.py
@@ -71,4 +71,9 @@ def get_packages(root_dir='aitom', exclude_dir_roots=['aitom/tomominer/core/src'
       packages=get_packages(),
       package_dir={'aitom': 'aitom',
                    'aitom.tomominer.core': 'aitom/tomominer/core/', },
-      cmdclass={'build_ext': build_ext, })
+      cmdclass={'build_ext': build_ext, },
+      entry_points={
+          'console_scripts': [
+              'picking = aitom.cmd.picking:main',
+          ]}
+      )