diff --git a/pyproject.toml b/pyproject.toml index b030107..d8c1048 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "aposteriori" -version = "2.3.0" +version = "2.4.0" requires-python = ">= 3.8" readme = "README.md" dependencies = [ diff --git a/setup.py b/setup.py index 5cc8e91..c86b401 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="aposteriori", - version="2.3.0", + version="2.4.0", author="Wells Wood Research Group", author_email="chris.wood@ed.ac.uk", description="A library for the voxelization of protein structures for protein design.", diff --git a/src/aposteriori/config.py b/src/aposteriori/config.py index f3e5806..5ee53f7 100644 --- a/src/aposteriori/config.py +++ b/src/aposteriori/config.py @@ -3,7 +3,7 @@ from ampal.data import ELEMENT_DATA # Config paths -MAKE_FRAME_DATASET_VER = "2.3.0" +MAKE_FRAME_DATASET_VER = "2.4.0" PROJECT_ROOT_DIR = pathlib.Path(__file__).parent DATA_FOLDER = PROJECT_ROOT_DIR / "data" DATA_FOLDER.mkdir(parents=True, exist_ok=True) diff --git a/src/aposteriori/data_prep/create_frame_data_set.py b/src/aposteriori/data_prep/create_frame_data_set.py index 531b000..c6ce022 100644 --- a/src/aposteriori/data_prep/create_frame_data_set.py +++ b/src/aposteriori/data_prep/create_frame_data_set.py @@ -450,7 +450,7 @@ def convert_atom_to_gaussian_density( # Calculate Density: voxel_density = np.exp( -((vx - x) ** 2 + (vy - y) ** 2 + (vz - z) ** 2) - / wanderwaal_radius**2 + / wanderwaal_radius ** 2 ) # Add density to frame: gaussian_frame[vy, vx, vz] = voxel_density @@ -583,12 +583,25 @@ def add_gaussian_at_position( return density_frame +def charge_polar_property(res: ampal.Residue, codec: Codec): + if "P" in codec.atomic_labels: + if res.mol_letter in standard_amino_acids.keys(): + res_property = -1 if polarity_Zimmerman[res.mol_letter] < 20 else 1 + else: + res_property = 0 + elif "Q" in codec.atomic_labels: + res_property = residue_charge[res.mol_letter] + else: + res_property = 0 + return res_property + + def create_residue_frame( residue: ampal.Residue, frame_edge_length: float, voxels_per_side: int, encode_cb: bool, - codec: object, + codec: Codec, voxels_as_gaussian: bool = False, ) -> np.ndarray: """Creates a discrete representation of a volume of space around a residue. @@ -634,15 +647,6 @@ def create_residue_frame( voxel_edge_length = frame_edge_length / voxels_per_side assembly = residue.parent.parent chain = residue.parent - if "P" in codec.atomic_labels: - if residue.mol_letter in standard_amino_acids.keys(): - res_property = -1 if polarity_Zimmerman[residue.mol_letter] < 20 else 1 - else: - res_property = 0 - # res_property = -1 if res_property < 20 else 1 - elif "Q" in codec.atomic_labels: - res_property = residue_charge[residue.mol_letter] - align_to_residue_plane(residue) frame = np.zeros( @@ -682,6 +686,7 @@ def create_residue_frame( np.testing.assert_array_equal( frame[indices], np.array([False] * len(frame[indices]), dtype=bool) ) + res_property = charge_polar_property(res, codec) # Encode atoms: if voxels_as_gaussian: modifiers_triple = calculate_atom_coord_modifier_within_voxel( @@ -699,11 +704,7 @@ def create_residue_frame( atom_coord=indices, atom_idx=atom_idx, ) - if ( - "Q" in codec.atomic_labels - or "P" in codec.atomic_labels - and res_property != 0 - ): + if res_property != 0: gaussian_atom = gaussian_matrix[:, :, :, atom_idx] * float(res_property) # Add at position: frame = add_gaussian_at_position( @@ -1640,7 +1641,7 @@ def make_frame_dataset( print(f"Will attempt to process {total_files} structure file/s.") print(f"Output file will be written to `{output_file_path.resolve()}`.") voxel_edge_length = frame_edge_length / voxels_per_side - max_voxel_distance = np.sqrt(voxel_edge_length**2 * 3) + max_voxel_distance = np.sqrt(voxel_edge_length ** 2 * 3) print(f"Frame edge length = {frame_edge_length:.2f} A") print(f"Voxels per side = {voxels_per_side}") print(f"Voxels will have an edge length of {voxel_edge_length:.2f} A.") diff --git a/tests/test_create_frame_data_set.py b/tests/test_create_frame_data_set.py index 0616fb1..e7329f2 100644 --- a/tests/test_create_frame_data_set.py +++ b/tests/test_create_frame_data_set.py @@ -430,6 +430,11 @@ def test_make_frame_dataset_as_gaussian_cnocacbq(): assert np.max(test_residue[:, :, :, 5]) > 0 if charge < 0: assert np.min(test_residue[:, :, :, 5]) < 0 + if residue_number == "32" or residue_number == "33": + assert ( + np.max(test_residue[:, :, :, 5]) > 0 + and np.min(test_residue[:, :, :, 5]) < 0 + ), "Frame 32 and 33 should have both positive and negative values as the residues are Lys and Asp" def test_make_frame_dataset_as_gaussian_cnocacbp(): @@ -483,11 +488,11 @@ def test_make_frame_dataset_as_gaussian_cnocacbp(): # check that the frame for all the data frames match between the input # arrays and the ones that come out of the HDF5 data set residue_number = str(n) - residue_test = array_test[n - 1] + test_residue = array_test[n - 1] hdf5_array = dataset["1ubq"]["A"][residue_number][()] npt.assert_array_equal( hdf5_array, - residue_test, + test_residue, err_msg=( "The frame in the HDF5 data set should be the same as the " "input frame." @@ -506,10 +511,14 @@ def test_make_frame_dataset_as_gaussian_cnocacbp(): else: polarity = 0 if polarity == 1: - assert np.max(residue_test[:, :, :, 5]) > 0 + assert np.max(test_residue[:, :, :, 5]) > 0 if polarity == 0: - assert np.min(residue_test[:, :, :, 5]) < 0 - + assert np.min(test_residue[:, :, :, 5]) < 0 + if residue_number == "5" or residue_number == "6": + assert ( + np.max(test_residue[:, :, :, 5]) > 0 + and np.min(test_residue[:, :, :, 5]) < 0 + ), "Frame 5 and 6 should have both positive and negative values as the residues are Lys and Asp" @settings(deadline=700) @given(integers(min_value=0, max_value=214))