Skip to content

Commit

Permalink
Merge pull request #67 from pconesa/master
Browse files Browse the repository at this point in the history
entry point for picking
  • Loading branch information
xulabs authored Jan 23, 2020
2 parents 11598c7 + 923955b commit 45bfb01
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 1 deletion.
Empty file added aitom/cmd/__init__.py
Empty file.
126 changes: 126 additions & 0 deletions aitom/cmd/picking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
'''
a tutorial on using particle picking
Reference:
Pei et al. Simulating cryo electron tomograms of crowded cell cytoplasm for assessment of automated particle picking
https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1283-3
'''
import sys

from aitom.pick.dog.particle_picking_dog__util import peak__partition
from aitom.pick.dog.particle_picking_dog__filter import do_filter
import os
import json
import aitom.io.file as io_file
import aitom.image.vol.util as im_vol_util
from bisect import bisect
from pprint import pprint

def picking(path, s1, s2, t, find_maxima=True, partition_op=None, multiprocessing_process_num=0):
'''
parameters:
path:file path s1:sigma1 s2:sigma2 t:threshold level find_maxima:peaks appears at the maximum/minimum multiprocessing_process_num: number of multiporcessing
partition_op: partition the volume for multithreading, is a dict consists 'nonoverlap_width', 'overlap_width' and 'save_vg'
# Take a two-dimensional image as an example, if the image size is 210*150(all in pixels), nonoverlap_width is 60 and overlap_width is 30.
# It will be divided into 6 pieces for different threads to process. The ranges of their X and Y are
# (first line) (0-90)*(0-90) (60-150)*(0-90) (120-210)*(0-90) (0-90)
# (second line) (0-90)*(60-150) (60-150)*(60-150) (120-210)*(60-150)
In general, s2=1.1*s1, s1 and t depend on particle size and noise. In practice, s1 should be roughly equal to the particle radius(in pixels). In related paper, the model achieves highest comprehensive score when s1=7 and t=3.
return:
a list including all peaks information (in descending order of value), each element in the return list looks like:
{'val': 281.4873046875, 'x': [1178, 1280, 0], 'uuid': '6ad66107-088c-471e-b65f-0b3b2fdc35b0'}
'val' is the score of the peak when picking, only the score is higher than the threshold will the peak be selected.
'x' is the center of the peak in the tomogram.
'uuid' is an unique id for each peak.
'''
a = io_file.read_mrc_data(path)
print("file has been read")
temp = im_vol_util.cub_img(a)
a_im = temp['im'] # image data
a_vt = temp['vt'] # volume data

# using DoG to detect all peaks, may contain peaks caused by noise
peaks = peak__partition(a_vt, s1=s1, s2=s2, find_maxima=find_maxima, partition_op=partition_op, multiprocessing_process_num=multiprocessing_process_num)

# calculate threshold T and delete peaks whose val are smaller than threshold
# Related paper: Pei L, Xu M, Frazier Z, Alber F. Simulating Cryo-Electron Tomograms of Crowded Mixtures of Macromolecular Complexes and Assessment of Particle Picking. BMC Bioinformatics. 2016; 17: 405.
M = peaks[0]['val'] # max val of all peaks
m = peaks[len(peaks)-1]['val'] # min val of all peaks
T = m+t*(M-m)/20
peak_vals_neg = [-peak['val']*find_maxima for peak in peaks]
res = peaks[:bisect(peak_vals_neg, -T*find_maxima)-1]
assert res[-1]['val'] >= T
print("T=m+t*(M-m)/20 \nT=%f m=%f t=%f M=%f" %(T,m,t,M))
return res

def printUsage():
print("Usage: "
"This script will use aiTom picking to pick a tomogram.\n"
"Invoke this file passing the PATH to a tomogram and the name of the output\n"
"Example: particle_picking.py /tmp/mytomogram.mrc /tmp/coordinates3D.json")

def getParams():

# Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp
if len(sys.argv) != 3:
printUsage()
raise AttributeError(
"Wrong number of parameters. 2 expected, %s found: %s" % (
len(sys.argv) - 1, sys.argv[1:]))

path = sys.argv[1]

if not os.path.exists(path):
raise FileNotFoundError("File %s does not exists." % path)

output = sys.argv[2]

return path, output

def main():

path, output = getParams()

# Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution
# crop_path = 'cropped.mrc'
# crop_mrc(path, crop_path)

mrc_header = io_file.read_mrc_header(path)
voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC']['nx'] / 10
print ("voxel_spacing_in_nm: %s" % voxel_spacing_in_nm)

# Note: with our test data, voxel_spacing_in_nm has 0 and next division fails.
sigma1 = 2
try:
sigma1 = max(int(7 / voxel_spacing_in_nm), sigma1) # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2
except Exception as e:
pass

print('sigma1=%d' %sigma1)
# For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1.
# For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels)
# print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1)

partition_op = {'nonoverlap_width': sigma1*20, 'overlap_width': sigma1*10, 'save_vg': False}
result = picking(path, s1=sigma1, s2=sigma1*1.1, t=3, find_maxima=False, partition_op=partition_op, multiprocessing_process_num=100)
print("%d particles detected, containing redundant peaks" % len(result))
result = do_filter(pp=result, peak_dist_min=sigma1, op=None) # remove redundant peaks
print("peak number reduced to %d" % len(result))
pprint(result[:5])

json_data=[] # generate file for 3dmod
for i in range(len(result)):
loc_np=result[i]['x']
loc=[]
for j in range(len(loc_np)):
loc.append(loc_np[j].tolist())
json_data.append({'peak':{'loc':loc}})

with open(output,'w') as f:
json.dump(json_data,f)


if __name__ == '__main__':
main()

7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,9 @@ def get_packages(root_dir='aitom', exclude_dir_roots=['aitom/tomominer/core/src'
packages=get_packages(),
package_dir={'aitom': 'aitom',
'aitom.tomominer.core': 'aitom/tomominer/core/', },
cmdclass={'build_ext': build_ext, })
cmdclass={'build_ext': build_ext, },
entry_points={
'console_scripts': [
'picking = aitom.cmd.picking:main',
]}
)

0 comments on commit 45bfb01

Please sign in to comment.