Update to suppress mean zero noise only

kiharalab · Jul 23, 2024 · b59b995 · b59b995
1 parent 9121897
commit b59b995
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 24 deletions.
diff --git a/.gitignore b/.gitignore
@@ -160,4 +160,5 @@ cython_debug/
 #.idea/
 
 CryoREAD_Predict_Result/
-output_test/
+output_test/
+examples/
diff --git a/README.md b/README.md
@@ -1,25 +1,46 @@
 # AutoClass3D
-A deep learning based tool to automatically select the best reconstructed 3D maps within a group of maps.
 
+A deep learning based tool to automatically select the best reconstructed 3D maps within a group of maps.
 
 ## Installation
+
 clone the repository:
+
 ```
 git clone github.itap.purdue.edu/kiharalab/AutoClass3D
 ```
+
 create conda environment:
+
 ```
 conda env create -f environment.yml
 ```
 
-## Arguments (All required)
+## Arguments for Class3D/InitialModel Selection (All required)
+
 ```
 -F: Class3D MRC files to be examine, separated by space
 -G: The GPU ID to use for the computation, use comma to seperate multiple GPUs
 -J: The Job Name
 ```
 
-## Example
+## Example for Class3D/InitialModel Selection
+
 ```
 python main.py -F ./Class3D/job052/class1.mrc ./Class3D/job052/class2.mrc ./Class3D/job052/class3.mrc -G 0,1,2 -J job052_select
-```
+```
+
+## Arguments for Auto Contouring
+
+```
+-i: Input MRC map file to determine the contour
+-o: Output folder to store all the files
+-p: Plot all components (Optional, False by default)
+-n: Number of intializations (Optional, 3 by default)
+```
+
+## Example for generating
+
+```
+python gmm_contour.py -i ./Class3D/job052/class1.mrc -o ./output_folder -p
+```
diff --git a/gmm_contour.py b/gmm_contour.py
@@ -122,17 +122,20 @@ def gmm_mask(input_map_path, output_folder, num_components=3, use_grad=False, n_
             # plot mean
             # mean = g.means_[pred, 0]
             # ax.axvline(mean, label=f"Mean_{pred}")
+            ax.legend(loc="upper right")
         fig.tight_layout()
-        print("Saving figure to", os.path.join(output_folder, "hist_by_component.png"))
-        fig.savefig(os.path.join(output_folder, "hist_by_component.png"))
+        # print("Saving figure to", os.path.join(output_folder, "hist_by_component.png"))
+        fig.savefig(os.path.join(output_folder, Path(input_map_path).stem + "_hist_by_components.png"))
 
     # generate a mask to keep only the component with the largest variance
     mask = np.zeros_like(map_data)
     # mask[np.nonzero(masked_prot_data)] = (preds == np.argmax(g.means_[:, 0].flatten()))
 
-    ind = np.argpartition(g.means_[:, 0].flatten(), -3)[-3:]
+    # ind = np.argpartition(g.means_[:, 0].flatten(), -3)[-3:]
+    # choose ind that is closest to 0
+    ind = np.argmin(np.abs(g.means_[:, 0].flatten()))
 
-    print("ind", ind)
+    print("ind to remove", ind)
 
     # mask[np.nonzero(map_data)] = preds in ind
     print(
@@ -142,31 +145,36 @@ def gmm_mask(input_map_path, output_folder, num_components=3, use_grad=False, n_
     )
     print("Variances: ", g.covariances_.shape, g.covariances_[:, 0, 0])
 
-    mask[np.nonzero(map_data)] = (preds == ind[0]) | (preds == ind[1]) | (preds == ind[2])
+    # mask[np.nonzero(map_data)] = (preds == ind[0]) | (preds == ind[1]) | (preds == ind[2])
+    mask[np.nonzero(map_data)] = (preds != ind)
 
-    print("Nonzero", np.count_nonzero(mask))
+    noise_comp = map_data[np.nonzero(map_data)][preds == ind]
+    # 98 percentile
+    # revised_contour = np.percentile(noise_comp, 98)
+    revised_contour = np.max(noise_comp)
+
+    print("Revised contour", revised_contour)
+
+    print("Remaining mask region size in voxels", np.count_nonzero(mask))
 
     # use opening to remove small artifacts
-    mask = opening(mask.astype(bool))
-    # gaussian_mask = gaussian(mask.astype(float), sigma=3, preserve_range=True)
-    # mask = np.clip(gaussian_mask + mask, 0, 1)
-    # mask[mask < 1] = mask[mask < 1] / np.max(mask[mask < 1])
+    mask = opening(mask.astype(bool), ball(3))
     new_data = map_data * mask
     new_data_non_zero = new_data[np.nonzero(new_data)]
 
     # save the new data
     save_mrc(input_map_path, new_data,
              os.path.join(output_folder, Path(input_map_path).stem + "_mask.mrc"))
 
-    if use_grad == True:
-        # use 1 sigma cutoff from the masked data
-        # revised_contour = np.mean(new_data_non_zero) + np.std(new_data_non_zero)
-        # use median cutoff from the masked data, could be other percentile
-        revised_contour = np.percentile(new_data_non_zero, 50)
-    else:
-        revised_contour = np.min(new_data[new_data > 1e-8])
+    # if use_grad == True:
+    #     # use 1 sigma cutoff from the masked data
+    #     # revised_contour = np.mean(new_data_non_zero) + np.std(new_data_non_zero)
+    #     # use median cutoff from the masked data, could be other percentile
+    #     revised_contour = np.percentile(new_data_non_zero, 50)
+    # else:
+    #     revised_contour = np.min(new_data[new_data > 1e-8])
 
-    mask_percent = np.count_nonzero(new_data > 1e-6) / np.count_nonzero(map_data > 1e-6)
+    mask_percent = np.count_nonzero(new_data > 1e-8) / np.count_nonzero(map_data > 1e-8)
 
     # plot the histogram
     fig, ax = plt.subplots(figsize=(10, 2))
@@ -193,6 +201,7 @@ def gmm_mask(input_map_path, output_folder, num_components=3, use_grad=False, n_
     parser.add_argument("-i", "--input_map_path", type=str, default=None)
     parser.add_argument("-o", "--output_folder", type=str, default=None)
     parser.add_argument("-p", "--plot_all", action="store_true")
+    parser.add_argument("-n", "--num_components", type=int, default=3)
     args = parser.parse_args()
     revised_contour, mask_percent = gmm_mask(input_map_path=args.input_map_path, output_folder=args.output_folder,
-                                             num_components=5, use_grad=True, n_init=1, plot_all=args.plot_all)
+                                             num_components=3, use_grad=True, n_init=3, plot_all=args.plot_all)