From 923955b5e4426e57339c4a7ed9b5a13185e4124c Mon Sep 17 00:00:00 2001 From: pconesa Date: Thu, 5 Dec 2019 10:20:19 +0100 Subject: [PATCH] entry point for picking --- aitom/cmd/__init__.py | 0 aitom/cmd/picking.py | 126 ++++++++++++++++++++++++++++++++++++++++++ setup.py | 7 ++- 3 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 aitom/cmd/__init__.py create mode 100644 aitom/cmd/picking.py diff --git a/aitom/cmd/__init__.py b/aitom/cmd/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/aitom/cmd/picking.py b/aitom/cmd/picking.py new file mode 100644 index 0000000..58322ff --- /dev/null +++ b/aitom/cmd/picking.py @@ -0,0 +1,126 @@ +''' +a tutorial on using particle picking + +Reference: +Pei et al. Simulating cryo electron tomograms of crowded cell cytoplasm for assessment of automated particle picking +https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1283-3 +''' +import sys + +from aitom.pick.dog.particle_picking_dog__util import peak__partition +from aitom.pick.dog.particle_picking_dog__filter import do_filter +import os +import json +import aitom.io.file as io_file +import aitom.image.vol.util as im_vol_util +from bisect import bisect +from pprint import pprint + +def picking(path, s1, s2, t, find_maxima=True, partition_op=None, multiprocessing_process_num=0): + ''' + parameters: + path:file path s1:sigma1 s2:sigma2 t:threshold level find_maxima:peaks appears at the maximum/minimum multiprocessing_process_num: number of multiporcessing + partition_op: partition the volume for multithreading, is a dict consists 'nonoverlap_width', 'overlap_width' and 'save_vg' + # Take a two-dimensional image as an example, if the image size is 210*150(all in pixels), nonoverlap_width is 60 and overlap_width is 30. + # It will be divided into 6 pieces for different threads to process. The ranges of their X and Y are + # (first line) (0-90)*(0-90) (60-150)*(0-90) (120-210)*(0-90) (0-90) + # (second line) (0-90)*(60-150) (60-150)*(60-150) (120-210)*(60-150) + In general, s2=1.1*s1, s1 and t depend on particle size and noise. In practice, s1 should be roughly equal to the particle radius(in pixels). In related paper, the model achieves highest comprehensive score when s1=7 and t=3. + + return: + a list including all peaks information (in descending order of value), each element in the return list looks like: + {'val': 281.4873046875, 'x': [1178, 1280, 0], 'uuid': '6ad66107-088c-471e-b65f-0b3b2fdc35b0'} + 'val' is the score of the peak when picking, only the score is higher than the threshold will the peak be selected. + 'x' is the center of the peak in the tomogram. + 'uuid' is an unique id for each peak. + ''' + a = io_file.read_mrc_data(path) + print("file has been read") + temp = im_vol_util.cub_img(a) + a_im = temp['im'] # image data + a_vt = temp['vt'] # volume data + + # using DoG to detect all peaks, may contain peaks caused by noise + peaks = peak__partition(a_vt, s1=s1, s2=s2, find_maxima=find_maxima, partition_op=partition_op, multiprocessing_process_num=multiprocessing_process_num) + + # calculate threshold T and delete peaks whose val are smaller than threshold + # Related paper: Pei L, Xu M, Frazier Z, Alber F. Simulating Cryo-Electron Tomograms of Crowded Mixtures of Macromolecular Complexes and Assessment of Particle Picking. BMC Bioinformatics. 2016; 17: 405. + M = peaks[0]['val'] # max val of all peaks + m = peaks[len(peaks)-1]['val'] # min val of all peaks + T = m+t*(M-m)/20 + peak_vals_neg = [-peak['val']*find_maxima for peak in peaks] + res = peaks[:bisect(peak_vals_neg, -T*find_maxima)-1] + assert res[-1]['val'] >= T + print("T=m+t*(M-m)/20 \nT=%f m=%f t=%f M=%f" %(T,m,t,M)) + return res + +def printUsage(): + print("Usage: " + "This script will use aiTom picking to pick a tomogram.\n" + "Invoke this file passing the PATH to a tomogram and the name of the output\n" + "Example: particle_picking.py /tmp/mytomogram.mrc /tmp/coordinates3D.json") + +def getParams(): + + # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp + if len(sys.argv) != 3: + printUsage() + raise AttributeError( + "Wrong number of parameters. 2 expected, %s found: %s" % ( + len(sys.argv) - 1, sys.argv[1:])) + + path = sys.argv[1] + + if not os.path.exists(path): + raise FileNotFoundError("File %s does not exists." % path) + + output = sys.argv[2] + + return path, output + +def main(): + + path, output = getParams() + + # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution + # crop_path = 'cropped.mrc' + # crop_mrc(path, crop_path) + + mrc_header = io_file.read_mrc_header(path) + voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC']['nx'] / 10 + print ("voxel_spacing_in_nm: %s" % voxel_spacing_in_nm) + + # Note: with our test data, voxel_spacing_in_nm has 0 and next division fails. + sigma1 = 2 + try: + sigma1 = max(int(7 / voxel_spacing_in_nm), sigma1) # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2 + except Exception as e: + pass + + print('sigma1=%d' %sigma1) + # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1. + # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels) + # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1) + + partition_op = {'nonoverlap_width': sigma1*20, 'overlap_width': sigma1*10, 'save_vg': False} + result = picking(path, s1=sigma1, s2=sigma1*1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=100) + print("%d particles detected, containing redundant peaks" % len(result)) + result = do_filter(pp=result, peak_dist_min=sigma1, op=None) # remove redundant peaks + print("peak number reduced to %d" % len(result)) + pprint(result[:5]) + + json_data=[] # generate file for 3dmod + for i in range(len(result)): + loc_np=result[i]['x'] + loc=[] + for j in range(len(loc_np)): + loc.append(loc_np[j].tolist()) + json_data.append({'peak':{'loc':loc}}) + + with open(output,'w') as f: + json.dump(json_data,f) + + +if __name__ == '__main__': + main() + diff --git a/setup.py b/setup.py index 38871f0..e8ae72e 100644 --- a/setup.py +++ b/setup.py @@ -71,4 +71,9 @@ def get_packages(root_dir='aitom', exclude_dir_roots=['aitom/tomominer/core/src' packages=get_packages(), package_dir={'aitom': 'aitom', 'aitom.tomominer.core': 'aitom/tomominer/core/', }, - cmdclass={'build_ext': build_ext, }) + cmdclass={'build_ext': build_ext, }, + entry_points={ + 'console_scripts': [ + 'picking = aitom.cmd.picking:main', + ]} + )