From b59b99515ef21db0a2370abfbfd1b0d800cf599f Mon Sep 17 00:00:00 2001 From: Han Zhu Date: Tue, 23 Jul 2024 19:17:24 -0400 Subject: [PATCH] Update to suppress mean zero noise only --- .gitignore | 3 ++- README.md | 29 +++++++++++++++++++++++++---- gmm_contour.py | 47 ++++++++++++++++++++++++++++------------------- 3 files changed, 55 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index d46dcf4..cb4e637 100644 --- a/.gitignore +++ b/.gitignore @@ -160,4 +160,5 @@ cython_debug/ #.idea/ CryoREAD_Predict_Result/ -output_test/ \ No newline at end of file +output_test/ +examples/ \ No newline at end of file diff --git a/README.md b/README.md index 34bd408..88b6c56 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,46 @@ # AutoClass3D -A deep learning based tool to automatically select the best reconstructed 3D maps within a group of maps. +A deep learning based tool to automatically select the best reconstructed 3D maps within a group of maps. ## Installation + clone the repository: + ``` git clone github.itap.purdue.edu/kiharalab/AutoClass3D ``` + create conda environment: + ``` conda env create -f environment.yml ``` -## Arguments (All required) +## Arguments for Class3D/InitialModel Selection (All required) + ``` -F: Class3D MRC files to be examine, separated by space -G: The GPU ID to use for the computation, use comma to seperate multiple GPUs -J: The Job Name ``` -## Example +## Example for Class3D/InitialModel Selection + ``` python main.py -F ./Class3D/job052/class1.mrc ./Class3D/job052/class2.mrc ./Class3D/job052/class3.mrc -G 0,1,2 -J job052_select -``` \ No newline at end of file +``` + +## Arguments for Auto Contouring + +``` +-i: Input MRC map file to determine the contour +-o: Output folder to store all the files +-p: Plot all components (Optional, False by default) +-n: Number of intializations (Optional, 3 by default) +``` + +## Example for generating + +``` +python gmm_contour.py -i ./Class3D/job052/class1.mrc -o ./output_folder -p +``` diff --git a/gmm_contour.py b/gmm_contour.py index ec5450a..6db66bc 100644 --- a/gmm_contour.py +++ b/gmm_contour.py @@ -122,17 +122,20 @@ def gmm_mask(input_map_path, output_folder, num_components=3, use_grad=False, n_ # plot mean # mean = g.means_[pred, 0] # ax.axvline(mean, label=f"Mean_{pred}") + ax.legend(loc="upper right") fig.tight_layout() - print("Saving figure to", os.path.join(output_folder, "hist_by_component.png")) - fig.savefig(os.path.join(output_folder, "hist_by_component.png")) + # print("Saving figure to", os.path.join(output_folder, "hist_by_component.png")) + fig.savefig(os.path.join(output_folder, Path(input_map_path).stem + "_hist_by_components.png")) # generate a mask to keep only the component with the largest variance mask = np.zeros_like(map_data) # mask[np.nonzero(masked_prot_data)] = (preds == np.argmax(g.means_[:, 0].flatten())) - ind = np.argpartition(g.means_[:, 0].flatten(), -3)[-3:] + # ind = np.argpartition(g.means_[:, 0].flatten(), -3)[-3:] + # choose ind that is closest to 0 + ind = np.argmin(np.abs(g.means_[:, 0].flatten())) - print("ind", ind) + print("ind to remove", ind) # mask[np.nonzero(map_data)] = preds in ind print( @@ -142,15 +145,20 @@ def gmm_mask(input_map_path, output_folder, num_components=3, use_grad=False, n_ ) print("Variances: ", g.covariances_.shape, g.covariances_[:, 0, 0]) - mask[np.nonzero(map_data)] = (preds == ind[0]) | (preds == ind[1]) | (preds == ind[2]) + # mask[np.nonzero(map_data)] = (preds == ind[0]) | (preds == ind[1]) | (preds == ind[2]) + mask[np.nonzero(map_data)] = (preds != ind) - print("Nonzero", np.count_nonzero(mask)) + noise_comp = map_data[np.nonzero(map_data)][preds == ind] + # 98 percentile + # revised_contour = np.percentile(noise_comp, 98) + revised_contour = np.max(noise_comp) + + print("Revised contour", revised_contour) + + print("Remaining mask region size in voxels", np.count_nonzero(mask)) # use opening to remove small artifacts - mask = opening(mask.astype(bool)) - # gaussian_mask = gaussian(mask.astype(float), sigma=3, preserve_range=True) - # mask = np.clip(gaussian_mask + mask, 0, 1) - # mask[mask < 1] = mask[mask < 1] / np.max(mask[mask < 1]) + mask = opening(mask.astype(bool), ball(3)) new_data = map_data * mask new_data_non_zero = new_data[np.nonzero(new_data)] @@ -158,15 +166,15 @@ def gmm_mask(input_map_path, output_folder, num_components=3, use_grad=False, n_ save_mrc(input_map_path, new_data, os.path.join(output_folder, Path(input_map_path).stem + "_mask.mrc")) - if use_grad == True: - # use 1 sigma cutoff from the masked data - # revised_contour = np.mean(new_data_non_zero) + np.std(new_data_non_zero) - # use median cutoff from the masked data, could be other percentile - revised_contour = np.percentile(new_data_non_zero, 50) - else: - revised_contour = np.min(new_data[new_data > 1e-8]) + # if use_grad == True: + # # use 1 sigma cutoff from the masked data + # # revised_contour = np.mean(new_data_non_zero) + np.std(new_data_non_zero) + # # use median cutoff from the masked data, could be other percentile + # revised_contour = np.percentile(new_data_non_zero, 50) + # else: + # revised_contour = np.min(new_data[new_data > 1e-8]) - mask_percent = np.count_nonzero(new_data > 1e-6) / np.count_nonzero(map_data > 1e-6) + mask_percent = np.count_nonzero(new_data > 1e-8) / np.count_nonzero(map_data > 1e-8) # plot the histogram fig, ax = plt.subplots(figsize=(10, 2)) @@ -193,6 +201,7 @@ def gmm_mask(input_map_path, output_folder, num_components=3, use_grad=False, n_ parser.add_argument("-i", "--input_map_path", type=str, default=None) parser.add_argument("-o", "--output_folder", type=str, default=None) parser.add_argument("-p", "--plot_all", action="store_true") + parser.add_argument("-n", "--num_components", type=int, default=3) args = parser.parse_args() revised_contour, mask_percent = gmm_mask(input_map_path=args.input_map_path, output_folder=args.output_folder, - num_components=5, use_grad=True, n_init=1, plot_all=args.plot_all) + num_components=3, use_grad=True, n_init=3, plot_all=args.plot_all)