diff --git a/.DS_Store b/.DS_Store index dbfc581..62343de 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/README.md b/README.md index 5b72e52..baaee51 100644 --- a/README.md +++ b/README.md @@ -58,19 +58,18 @@ cd ../ - prepare the data ```angular2 -cd pdbbind bash data_prepare_model.sh cd .. ``` - train model ```angular2 cd tensorflow/script -python run_cls.py --config configs/vgg_depth6.yaml +python run_cls.py --config configs/train_resnet_depth6.yaml ``` - test performance ```angular2 -python test_reg_ave.py --config configs/vgg_depth6_test.yaml +python test_reg_ave.py --config configs/test_resnet_depth6.yaml ``` ### Acknowledgments diff --git a/octree/.DS_Store b/octree/.DS_Store index 018aff8..c376d1a 100644 Binary files a/octree/.DS_Store and b/octree/.DS_Store differ diff --git a/pdbbind/.DS_Store b/pdbbind/.DS_Store new file mode 100644 index 0000000..159ff1c Binary files /dev/null and b/pdbbind/.DS_Store differ diff --git a/pdbbind/bash_scripts/.DS_Store b/pdbbind/bash_scripts/.DS_Store new file mode 100644 index 0000000..dc084d3 Binary files /dev/null and b/pdbbind/bash_scripts/.DS_Store differ diff --git a/pdbbind/bash_scripts/correct_index0.sh b/pdbbind/bash_scripts/correct_index0.sh new file mode 100644 index 0000000..b234a49 --- /dev/null +++ b/pdbbind/bash_scripts/correct_index0.sh @@ -0,0 +1,6 @@ +for D in ./*/; +do + cd "${D}"; + python ../../python/clean_index_error.py + cd ..; +done \ No newline at end of file diff --git a/pdbbind/bash_scripts/correct_mol2.sh b/pdbbind/bash_scripts/correct_mol2.sh new file mode 100644 index 0000000..5a1b490 --- /dev/null +++ b/pdbbind/bash_scripts/correct_mol2.sh @@ -0,0 +1,13 @@ +for D in '3qlb' '4oem' '4oel' '2fov' '2fou' '2foy' +do + cd "${D}"; + echo "${D}"; + FNAME=$(basename ${D}) + FPATH=$(dirname ${D}) + DNAME=$(basename ${FNAME}) + echo $DNAME + + mv ${DNAME}_ligand.mol2 ${DNAME}_ligand.mol2.record; + babel ${DNAME}_ligand.sdf -omol2 ${DNAME}_ligand.mol2; + cd .. +done \ No newline at end of file diff --git a/pdbbind/bash_scripts/data_prepare_points.sh b/pdbbind/bash_scripts/data_prepare_points.sh new file mode 100644 index 0000000..c7e4078 --- /dev/null +++ b/pdbbind/bash_scripts/data_prepare_points.sh @@ -0,0 +1,10 @@ +for D in ./*/; +do + echo "${D}"; + cd "${D}"; + java -cp ../../../java/cdk-2.3-SNAPSHOT.jar:../../../java Surface_for_single 3 ico pdbbind; + python ../../../python/write_complex.py + python ../../../python/atomic_feature.py + ../../../../octree/build/pdb_to_points --complex_id file_list.txt + cd ..; +done \ No newline at end of file diff --git a/pdbbind/bash_scripts/octree_for_single.sh b/pdbbind/bash_scripts/octree_for_single.sh new file mode 100644 index 0000000..c1df0e3 --- /dev/null +++ b/pdbbind/bash_scripts/octree_for_single.sh @@ -0,0 +1,12 @@ +# here 6 is a parameter to control the density of point clouds +# 6 is for depth 10, can try smaller numbers e.g. 3-5 for smaller depth +java -cp ../../../java/cdk-2.3-SNAPSHOT.jar:../../../java Surface_for_single 6 ico pdbbind + +python ../../../python/write_complex.py +python ../../../python/atomic_feature.py +../../../../octree/build/pdb_to_points --complex_id file_list.txt +mkdir octree_folder +../../../../octree/build/octree --depth 10 --filenames point_list.txt --output_path ./octree_folder +cd octree_folder +ls -lh + diff --git a/pdbbind/data_download.sh b/pdbbind/data_download.sh new file mode 100644 index 0000000..9789f32 --- /dev/null +++ b/pdbbind/data_download.sh @@ -0,0 +1,27 @@ +mkdir data_folder +cd data_folder + +wget "http://www.pdbbind.org.cn/download/pdbbind_v2018_refined.tar.gz" +wget "http://www.pdbbind.org.cn/download/pdbbind_v2018_other_PL.tar.gz" +wget "http://www.pdbbind-cn.org/download/CASF-2016.tar.gz" + +tar -xzvf pdbbind_v2018_refined.tar.gz +tar -xzvf pdbbind_v2018_other_PL.tar.gz +tar -xzvf CASF-2016.tar.gz +mv CASF-2016/coreset . + +cp ../bash_scripts/* v2018-other-PL +cp ../bash_scripts/* refined-set +cp ../bash_scripts/* coreset + +cd v2018-other-PL +bash correct_mol2.sh +bash correct_index0.sh + +cd ../refined-set +bash correct_index0.sh + +cd ../coreset +bash correct_index0.sh + + diff --git a/pdbbind/data_example/.DS_Store b/pdbbind/data_example/.DS_Store new file mode 100644 index 0000000..e4db186 Binary files /dev/null and b/pdbbind/data_example/.DS_Store differ diff --git a/pdbbind/data_example/pdbbind/.DS_Store b/pdbbind/data_example/pdbbind/.DS_Store new file mode 100644 index 0000000..5a58224 Binary files /dev/null and b/pdbbind/data_example/pdbbind/.DS_Store differ diff --git a/pdbbind/data_example/pdbbind/1a1e/.DS_Store b/pdbbind/data_example/pdbbind/1a1e/.DS_Store new file mode 100644 index 0000000..d98b9ed Binary files /dev/null and b/pdbbind/data_example/pdbbind/1a1e/.DS_Store differ diff --git a/pdbbind/data_example/pdbbind/1a1e/1a1e_ligand.mol2 b/pdbbind/data_example/pdbbind/1a1e/1a1e_ligand.mol2 new file mode 100755 index 0000000..4b6f001 --- /dev/null +++ b/pdbbind/data_example/pdbbind/1a1e/1a1e_ligand.mol2 @@ -0,0 +1,163 @@ +### +### Created by X-TOOL on Mon Sep 10 21:12:46 2018 +### + +@MOLECULE +1a1e_ligand + 73 74 1 0 0 +SMALL +GAST_HUCK + + +@ATOM + 1 C 43.2930 -8.1280 37.5500 C.2 1 ACE 0.1753 + 2 O 44.0670 -7.1860 37.7880 O.2 1 ACE -0.3972 + 3 CH3 43.5190 -9.0100 36.3320 C.3 1 ACE 0.0258 + 4 N 42.1690 -8.3110 38.2080 N.am 1 PTR -0.2628 + 5 CA 41.8750 -7.5040 39.3540 C.3 1 PTR 0.1468 + 6 C 42.3970 -8.1890 40.5900 C.2 1 PTR 0.2074 + 7 O 42.8670 -9.3290 40.5910 O.2 1 PTR -0.3941 + 8 CB 40.3910 -7.2910 39.3740 C.3 1 PTR 0.0400 + 9 CG 40.0200 -6.4670 38.1650 C.ar 1 PTR -0.0004 + 10 CD1 39.4330 -7.0660 37.0710 C.ar 1 PTR -0.0239 + 11 CD2 40.2030 -5.0890 38.1630 C.ar 1 PTR -0.0239 + 12 CE1 39.0230 -6.2990 35.9910 C.ar 1 PTR 0.0115 + 13 CE2 39.7950 -4.3030 37.0800 C.ar 1 PTR 0.0115 + 14 CZ 39.1970 -4.9130 35.9920 C.ar 1 PTR 0.1728 + 15 OH 38.7140 -4.1030 34.8990 O.3 1 PTR -0.2040 + 16 P 39.5630 -3.9590 33.5410 P.3 1 PTR 0.1348 + 17 O1P 39.2960 -5.2230 32.8550 O.co2 1 PTR -0.6653 + 18 O2P 38.9860 -2.7290 32.9200 O.co2 1 PTR -0.6653 + 19 O3P 40.9750 -3.8330 34.0580 O.co2 1 PTR -0.6653 + 20 N 42.3460 -7.4520 41.6620 N.am 1 GLU -0.2634 + 21 CA 42.8210 -7.9260 42.9250 C.3 1 GLU 0.1325 + 22 C 41.6490 -8.4730 43.7280 C.2 1 GLU 0.2042 + 23 O 40.5250 -7.9950 43.5040 O.2 1 GLU -0.3943 + 24 CB 43.4630 -6.7590 43.6070 C.3 1 GLU -0.0008 + 25 CG 44.7130 -6.2540 42.9200 C.3 1 GLU 0.0044 + 26 CD 45.8500 -7.2030 43.1550 C.2 1 GLU 0.0350 + 27 OE1 46.3510 -7.2360 44.2790 O.co2 1 GLU -0.5690 + 28 OE2 46.2130 -7.9200 42.2380 O.co2 1 GLU -0.5690 + 29 N1 41.7710 -9.4590 44.6560 N.am 1 DIY -0.2711 + 30 C2 43.0100 -10.2080 44.8790 C.3 1 DIY 0.0340 + 31 C6 40.6720 -9.7740 45.5450 C.3 1 DIY 0.0368 + 32 C3 43.5030 -9.7480 46.2370 C.3 1 DIY -0.0322 + 33 C4 42.3970 -9.9440 47.2660 C.3 1 DIY -0.0465 + 34 C5 41.0210 -9.3410 46.9320 C.3 1 DIY -0.0205 + 35 C1' 40.9160 -7.8430 47.1120 C.3 1 DIY -0.0484 + 36 C2' 41.2000 -7.5140 48.5540 C.3 1 DIY -0.0530 + 37 C3' 41.5460 -6.0620 48.7310 C.3 1 DIY -0.0559 + 38 C4' 40.3430 -5.2830 48.3020 C.3 1 DIY -0.0653 + 39 H1 42.7378 -9.7833 36.2893 H 1 ACE 0.0467 + 40 H2 44.5062 -9.4898 36.4047 H 1 ACE 0.0467 + 41 H3 43.4764 -8.3951 35.4210 H 1 ACE 0.0467 + 42 H4 41.5214 -9.0118 37.9089 H 1 PTR 0.1885 + 43 H5 42.3738 -6.5282 39.2592 H 1 PTR 0.0840 + 44 H6 40.1031 -6.7573 40.2917 H 1 PTR 0.0584 + 45 H7 39.8744 -8.2614 39.3368 H 1 PTR 0.0584 + 46 H8 39.2914 -8.1406 37.0549 H 1 PTR 0.0703 + 47 H9 40.6711 -4.6140 39.0176 H 1 PTR 0.0703 + 48 H10 38.5622 -6.7805 35.1361 H 1 PTR 0.0736 + 49 H11 39.9456 -3.2296 37.0933 H 1 PTR 0.0736 + 50 H12 41.9644 -6.5297 41.6007 H 1 GLU 0.1883 + 51 H13 43.5632 -8.7244 42.7773 H 1 GLU 0.0801 + 52 H14 43.7293 -7.0606 44.6308 H 1 GLU 0.0330 + 53 H15 42.7334 -5.9367 43.6450 H 1 GLU 0.0330 + 54 H16 44.9755 -5.2649 43.3235 H 1 GLU 0.0433 + 55 H17 44.5259 -6.1712 41.8392 H 1 GLU 0.0433 + 56 H18 43.7496 -9.9743 44.0990 H 1 DIY 0.0522 + 57 H19 42.8127 -11.2901 44.8848 H 1 DIY 0.0522 + 58 H20 40.4890 -10.8586 45.5335 H 1 DIY 0.0525 + 59 H21 39.7665 -9.2458 45.2118 H 1 DIY 0.0525 + 60 H22 43.7750 -8.6833 46.1881 H 1 DIY 0.0285 + 61 H23 44.3843 -10.3387 46.5276 H 1 DIY 0.0285 + 62 H24 42.7373 -9.4929 48.2098 H 1 DIY 0.0269 + 63 H25 42.2605 -11.0266 47.4052 H 1 DIY 0.0269 + 64 H26 40.2896 -9.7888 47.6209 H 1 DIY 0.0320 + 65 H27 39.9023 -7.5076 46.8476 H 1 DIY 0.0269 + 66 H28 41.6493 -7.3392 46.4651 H 1 DIY 0.0269 + 67 H29 42.0448 -8.1273 48.9007 H 1 DIY 0.0265 + 68 H30 40.3083 -7.7446 49.1554 H 1 DIY 0.0265 + 69 H31 42.4119 -5.7988 48.1057 H 1 DIY 0.0263 + 70 H32 41.7780 -5.8525 49.7856 H 1 DIY 0.0263 + 71 H33 40.5443 -4.2072 48.4124 H 1 DIY 0.0230 + 72 H34 39.4832 -5.5613 48.9290 H 1 DIY 0.0230 + 73 H35 40.1170 -5.5076 47.2492 H 1 DIY 0.0230 +@BOND + 1 1 2 2 + 2 1 3 1 + 3 5 4 1 + 4 5 8 1 + 5 6 5 1 + 6 6 7 2 + 7 8 9 1 + 8 9 11 ar + 9 9 10 ar + 10 10 12 ar + 11 12 14 ar + 12 14 15 1 + 13 13 14 ar + 14 11 13 ar + 15 15 16 1 + 16 16 19 ar + 17 16 18 ar + 18 16 17 ar + 19 22 21 1 + 20 21 24 1 + 21 21 20 1 + 22 24 25 1 + 23 25 26 1 + 24 26 27 ar + 25 26 28 ar + 26 22 23 2 + 27 29 31 1 + 28 29 30 1 + 29 30 32 1 + 30 32 33 1 + 31 34 33 1 + 32 34 35 1 + 33 31 34 1 + 34 35 36 1 + 35 36 37 1 + 36 37 38 1 + 37 4 1 am + 38 20 6 am + 39 22 29 am + 40 3 39 1 + 41 3 40 1 + 42 3 41 1 + 43 4 42 1 + 44 5 43 1 + 45 8 44 1 + 46 8 45 1 + 47 10 46 1 + 48 11 47 1 + 49 12 48 1 + 50 13 49 1 + 51 20 50 1 + 52 21 51 1 + 53 24 52 1 + 54 24 53 1 + 55 25 54 1 + 56 25 55 1 + 57 30 56 1 + 58 30 57 1 + 59 31 58 1 + 60 31 59 1 + 61 32 60 1 + 62 32 61 1 + 63 33 62 1 + 64 33 63 1 + 65 34 64 1 + 66 35 65 1 + 67 35 66 1 + 68 36 67 1 + 69 36 68 1 + 70 37 69 1 + 71 37 70 1 + 72 38 71 1 + 73 38 72 1 + 74 38 73 1 +@SUBSTRUCTURE + 1 ACE 1 + diff --git a/pdbbind/data_example/pdbbind/1a1e/1a1e_pocket.pdb b/pdbbind/data_example/pdbbind/1a1e/1a1e_pocket.pdb new file mode 100755 index 0000000..50bef71 --- /dev/null +++ b/pdbbind/data_example/pdbbind/1a1e/1a1e_pocket.pdb @@ -0,0 +1,397 @@ +HEADER 1A1E_POCKET +COMPND 1A1E_POCKET +REMARK GENERATED BY X-TOOL on Fri Nov 18 12:18:38 2016 +ATOM 1 N LYS B 155 36.647 6.515 30.602 1.00 35.68 N +ATOM 2 H LYS B 155 36.193 7.299 30.090 1.00 0.00 H +ATOM 3 CA LYS B 155 36.879 5.269 29.919 1.00 36.15 C +ATOM 4 C LYS B 155 38.252 5.067 29.287 1.00 36.67 C +ATOM 5 O LYS B 155 38.415 4.948 28.078 1.00 38.40 O +ATOM 6 CB LYS B 155 35.761 5.138 28.880 1.00 37.52 C +ATOM 7 N ILE B 156 39.317 5.070 30.074 1.00 39.79 N +ATOM 8 H ILE B 156 39.203 5.286 31.085 1.00 0.00 H +ATOM 9 CA ILE B 156 40.638 4.778 29.555 1.00 37.47 C +ATOM 10 C ILE B 156 40.792 3.256 29.689 1.00 43.37 C +ATOM 11 O ILE B 156 40.058 2.581 30.427 1.00 46.79 O +ATOM 12 CB ILE B 156 41.725 5.518 30.365 1.00 34.36 C +ATOM 13 CG1 ILE B 156 41.631 5.328 31.860 1.00 28.03 C +ATOM 14 CG2 ILE B 156 41.522 6.982 30.093 1.00 37.99 C +ATOM 15 CD1 ILE B 156 42.866 5.883 32.591 1.00 27.16 C +ATOM 16 N THR B 157 41.742 2.627 29.008 1.00 44.39 N +ATOM 17 H THR B 157 42.386 3.167 28.395 1.00 0.00 H +ATOM 18 CA THR B 157 41.883 1.195 29.116 1.00 40.18 C +ATOM 19 C THR B 157 42.819 0.846 30.258 1.00 38.29 C +ATOM 20 O THR B 157 43.540 1.700 30.777 1.00 39.02 O +ATOM 21 CB THR B 157 42.427 0.663 27.772 1.00 43.61 C +ATOM 22 OG1 THR B 157 43.810 0.934 27.784 1.00 46.49 O +ATOM 23 HG1 THR B 157 44.215 0.612 26.940 1.00 0.00 H +ATOM 24 CG2 THR B 157 41.710 1.252 26.550 1.00 42.07 C +ATOM 25 N ARG B 158 42.921 -0.422 30.600 1.00 34.16 N +ATOM 26 H ARG B 158 42.306 -1.119 30.134 1.00 0.00 H +ATOM 27 CA ARG B 158 43.846 -0.870 31.588 1.00 35.10 C +ATOM 28 C ARG B 158 45.281 -0.564 31.158 1.00 36.34 C +ATOM 29 O ARG B 158 46.114 -0.217 32.011 1.00 36.42 O +ATOM 30 CB ARG B 158 43.650 -2.385 31.808 1.00 38.19 C +ATOM 31 CG ARG B 158 44.723 -2.880 32.776 1.00 39.95 C +ATOM 32 CD ARG B 158 44.790 -4.362 33.002 1.00 49.29 C +ATOM 33 NE ARG B 158 43.629 -4.791 33.739 1.00 53.75 N +ATOM 34 HE ARG B 158 42.682 -4.468 33.456 1.00 0.00 H +ATOM 35 CZ ARG B 158 43.773 -5.601 34.782 1.00 56.85 C +ATOM 36 NH1 ARG B 158 44.986 -6.056 35.163 1.00 54.83 N +ATOM 37 1HH1 ARG B 158 45.837 -5.773 34.636 1.00 0.00 H +ATOM 38 2HH1 ARG B 158 45.070 -6.689 35.984 1.00 0.00 H +ATOM 39 NH2 ARG B 158 42.661 -5.860 35.484 1.00 62.07 N +ATOM 40 1HH2 ARG B 158 41.756 -5.434 35.200 1.00 0.00 H +ATOM 41 2HH2 ARG B 158 42.703 -6.487 36.313 1.00 0.00 H +ATOM 42 N ARG B 159 45.615 -0.709 29.871 1.00 38.74 N +ATOM 43 H ARG B 159 44.905 -1.031 29.183 1.00 0.00 H +ATOM 44 CA ARG B 159 46.988 -0.413 29.429 1.00 43.71 C +ATOM 45 C ARG B 159 47.269 1.084 29.617 1.00 40.70 C +ATOM 46 O ARG B 159 48.265 1.420 30.250 1.00 39.24 O +ATOM 47 CB ARG B 159 47.275 -0.791 27.913 1.00 50.97 C +ATOM 48 CG ARG B 159 46.596 -0.017 26.744 1.00 62.79 C +ATOM 49 CD ARG B 159 47.087 -0.294 25.307 1.00 68.70 C +ATOM 50 NE ARG B 159 46.654 0.790 24.432 1.00 75.01 N +ATOM 51 HE ARG B 159 45.870 1.387 24.766 1.00 0.00 H +ATOM 52 CZ ARG B 159 47.183 1.085 23.226 1.00 76.39 C +ATOM 53 NH1 ARG B 159 48.200 0.373 22.698 1.00 76.05 N +ATOM 54 1HH1 ARG B 159 48.600 -0.431 23.223 1.00 0.00 H +ATOM 55 2HH1 ARG B 159 48.584 0.628 21.766 1.00 0.00 H +ATOM 56 NH2 ARG B 159 46.727 2.147 22.552 1.00 77.09 N +ATOM 57 1HH2 ARG B 159 45.972 2.736 22.958 1.00 0.00 H +ATOM 58 2HH2 ARG B 159 47.127 2.384 21.622 1.00 0.00 H +ATOM 59 N SER B 161 45.901 3.306 31.714 1.00 37.50 N +ATOM 60 H SER B 161 45.081 2.721 31.456 1.00 0.00 H +ATOM 61 CA SER B 161 46.133 3.598 33.133 1.00 31.39 C +ATOM 62 C SER B 161 47.424 3.059 33.678 1.00 29.44 C +ATOM 63 O SER B 161 47.975 3.663 34.588 1.00 30.58 O +ATOM 64 CB SER B 161 45.039 3.042 34.007 1.00 28.98 C +ATOM 65 OG SER B 161 44.926 1.632 34.060 1.00 33.63 O +ATOM 66 HG SER B 161 44.750 1.282 33.151 1.00 0.00 H +ATOM 67 N GLU B 162 47.926 1.930 33.178 1.00 30.60 N +ATOM 68 H GLU B 162 47.423 1.424 32.421 1.00 0.00 H +ATOM 69 CA GLU B 162 49.164 1.418 33.690 1.00 33.04 C +ATOM 70 C GLU B 162 50.245 2.271 33.097 1.00 34.83 C +ATOM 71 O GLU B 162 51.246 2.495 33.761 1.00 38.48 O +ATOM 72 CB GLU B 162 49.360 -0.045 33.310 1.00 38.65 C +ATOM 73 CG GLU B 162 48.343 -0.934 34.039 1.00 46.27 C +ATOM 74 CD GLU B 162 48.597 -2.438 34.266 1.00 53.59 C +ATOM 75 OE1 GLU B 162 49.564 -2.819 34.915 1.00 54.57 O +ATOM 76 OE2 GLU B 162 47.723 -3.258 33.967 1.00 59.54 O +ATOM 77 N ARG B 178 38.608 4.716 36.782 1.00 26.33 N +ATOM 78 H ARG B 178 38.645 3.845 37.349 1.00 0.00 H +ATOM 79 CA ARG B 178 38.247 4.639 35.405 1.00 26.02 C +ATOM 80 C ARG B 178 37.160 3.608 35.314 1.00 30.73 C +ATOM 81 O ARG B 178 36.824 2.946 36.300 1.00 31.96 O +ATOM 82 CB ARG B 178 39.451 4.233 34.611 1.00 25.38 C +ATOM 83 CG ARG B 178 40.119 3.018 35.153 1.00 25.79 C +ATOM 84 CD ARG B 178 41.064 2.514 34.119 1.00 28.40 C +ATOM 85 NE ARG B 178 41.619 1.240 34.556 1.00 36.88 N +ATOM 86 HE ARG B 178 42.457 1.249 35.171 1.00 0.00 H +ATOM 87 CZ ARG B 178 41.095 0.047 34.199 1.00 34.29 C +ATOM 88 NH1 ARG B 178 40.018 -0.033 33.392 1.00 31.74 N +ATOM 89 1HH1 ARG B 178 39.573 0.835 33.031 1.00 0.00 H +ATOM 90 2HH1 ARG B 178 39.632 -0.963 33.131 1.00 0.00 H +ATOM 91 NH2 ARG B 178 41.684 -1.058 34.704 1.00 29.95 N +ATOM 92 1HH2 ARG B 178 42.504 -0.967 35.338 1.00 0.00 H +ATOM 93 2HH2 ARG B 178 41.318 -2.000 34.459 1.00 0.00 H +ATOM 94 N GLU B 179 36.568 3.465 34.153 1.00 35.92 N +ATOM 95 H GLU B 179 36.838 4.093 33.369 1.00 0.00 H +ATOM 96 CA GLU B 179 35.548 2.458 33.928 1.00 39.46 C +ATOM 97 C GLU B 179 36.161 1.111 33.619 1.00 38.78 C +ATOM 98 O GLU B 179 37.244 1.009 33.026 1.00 38.50 O +ATOM 99 CB GLU B 179 34.694 2.876 32.782 1.00 46.53 C +ATOM 100 CG GLU B 179 33.946 4.159 33.091 1.00 55.35 C +ATOM 101 CD GLU B 179 32.953 4.602 32.023 1.00 61.15 C +ATOM 102 OE1 GLU B 179 33.021 4.119 30.883 1.00 61.29 O +ATOM 103 OE2 GLU B 179 32.122 5.460 32.346 1.00 67.16 O +ATOM 104 N SER B 180 35.518 0.027 34.034 1.00 40.32 N +ATOM 105 H SER B 180 34.628 0.117 34.564 1.00 0.00 H +ATOM 106 CA SER B 180 36.063 -1.282 33.744 1.00 42.14 C +ATOM 107 C SER B 180 35.781 -1.576 32.276 1.00 42.39 C +ATOM 108 O SER B 180 34.643 -1.408 31.833 1.00 42.61 O +ATOM 109 CB SER B 180 35.376 -2.314 34.610 1.00 41.75 C +ATOM 110 OG SER B 180 35.956 -3.614 34.488 1.00 41.60 O +ATOM 111 HG SER B 180 36.908 -3.576 34.759 1.00 0.00 H +ATOM 112 N GLU B 181 36.794 -1.970 31.510 1.00 44.67 N +ATOM 113 H GLU B 181 37.756 -1.976 31.906 1.00 0.00 H +ATOM 114 CA GLU B 181 36.591 -2.396 30.125 1.00 52.62 C +ATOM 115 C GLU B 181 35.694 -3.667 30.088 1.00 54.37 C +ATOM 116 O GLU B 181 34.819 -3.877 29.239 1.00 53.82 O +ATOM 117 CB GLU B 181 37.907 -2.778 29.451 1.00 56.71 C +ATOM 118 CG GLU B 181 39.177 -1.918 29.496 1.00 66.14 C +ATOM 119 CD GLU B 181 40.451 -2.685 29.095 1.00 70.50 C +ATOM 120 OE1 GLU B 181 40.434 -3.905 29.084 1.00 80.17 O +ATOM 121 OE2 GLU B 181 41.503 -2.096 28.850 1.00 70.38 O +ATOM 122 N THR B 182 36.056 -4.547 31.030 1.00 56.85 N +ATOM 123 H THR B 182 36.860 -4.261 31.624 1.00 0.00 H +ATOM 124 CA THR B 182 35.480 -5.850 31.339 1.00 57.28 C +ATOM 125 C THR B 182 34.082 -5.987 31.965 1.00 55.70 C +ATOM 126 O THR B 182 33.262 -6.787 31.510 1.00 58.18 O +ATOM 127 CB THR B 182 36.566 -6.525 32.208 1.00 57.69 C +ATOM 128 OG1 THR B 182 37.689 -6.586 31.342 1.00 55.43 O +ATOM 129 HG1 THR B 182 38.448 -7.010 31.817 1.00 0.00 H +ATOM 130 CG2 THR B 182 36.220 -7.901 32.779 1.00 65.73 C +ATOM 131 N THR B 183 33.715 -5.264 33.008 1.00 51.90 N +ATOM 132 H THR B 183 34.343 -4.526 33.386 1.00 0.00 H +ATOM 133 CA THR B 183 32.431 -5.513 33.615 1.00 47.23 C +ATOM 134 C THR B 183 31.663 -4.264 33.322 1.00 48.34 C +ATOM 135 O THR B 183 31.996 -3.163 33.767 1.00 47.75 O +ATOM 136 CB THR B 183 32.612 -5.723 35.106 1.00 48.79 C +ATOM 137 OG1 THR B 183 33.525 -6.827 35.272 1.00 47.20 O +ATOM 138 HG1 THR B 183 33.667 -6.994 36.237 1.00 0.00 H +ATOM 139 CG2 THR B 183 31.260 -5.836 35.795 1.00 41.24 C +ATOM 140 N ALA B 186 30.950 -1.375 35.933 1.00 43.93 N +ATOM 141 H ALA B 186 31.259 -1.786 35.029 1.00 0.00 H +ATOM 142 CA ALA B 186 31.841 -1.390 37.071 1.00 40.75 C +ATOM 143 C ALA B 186 32.907 -0.372 36.814 1.00 37.28 C +ATOM 144 O ALA B 186 33.175 0.022 35.671 1.00 38.32 O +ATOM 145 CB ALA B 186 32.514 -2.774 37.269 1.00 34.85 C +ATOM 146 N TYR B 187 33.486 0.045 37.919 1.00 33.46 N +ATOM 147 H TYR B 187 33.141 -0.295 38.839 1.00 0.00 H +ATOM 148 CA TYR B 187 34.586 0.962 37.871 1.00 30.07 C +ATOM 149 C TYR B 187 35.799 0.220 38.322 1.00 29.21 C +ATOM 150 O TYR B 187 35.742 -0.917 38.773 1.00 26.71 O +ATOM 151 CB TYR B 187 34.341 2.128 38.778 1.00 32.19 C +ATOM 152 CG TYR B 187 33.184 2.909 38.234 1.00 32.48 C +ATOM 153 CD1 TYR B 187 33.422 3.826 37.212 1.00 35.84 C +ATOM 154 CD2 TYR B 187 31.909 2.693 38.734 1.00 35.48 C +ATOM 155 CE1 TYR B 187 32.370 4.540 36.669 1.00 36.66 C +ATOM 156 CE2 TYR B 187 30.853 3.407 38.195 1.00 38.36 C +ATOM 157 CZ TYR B 187 31.102 4.322 37.172 1.00 40.07 C +ATOM 158 OH TYR B 187 30.067 5.040 36.637 1.00 47.95 O +ATOM 159 HH TYR B 187 30.413 5.638 35.928 1.00 0.00 H +ATOM 160 N CYS B 188 36.915 0.876 38.190 1.00 29.82 N +ATOM 161 H CYS B 188 36.902 1.835 37.787 1.00 0.00 H +ATOM 162 CA CYS B 188 38.160 0.297 38.591 1.00 29.69 C +ATOM 163 C CYS B 188 38.954 1.415 39.281 1.00 27.86 C +ATOM 164 O CYS B 188 38.877 2.597 38.905 1.00 25.59 O +ATOM 165 CB CYS B 188 38.811 -0.248 37.310 1.00 26.27 C +ATOM 166 SG CYS B 188 40.444 -0.921 37.704 1.00 40.11 S +ATOM 167 N LEU B 189 39.675 1.053 40.334 1.00 24.60 N +ATOM 168 H LEU B 189 39.593 0.071 40.665 1.00 0.00 H +ATOM 169 CA LEU B 189 40.567 1.936 41.050 1.00 20.46 C +ATOM 170 C LEU B 189 41.964 1.369 40.790 1.00 24.44 C +ATOM 171 O LEU B 189 42.269 0.301 41.343 1.00 23.74 O +ATOM 172 CB LEU B 189 40.231 1.872 42.513 1.00 21.82 C +ATOM 173 CG LEU B 189 41.036 2.597 43.541 1.00 22.50 C +ATOM 174 CD1 LEU B 189 40.857 4.110 43.433 1.00 18.98 C +ATOM 175 CD2 LEU B 189 40.489 2.191 44.900 1.00 20.44 C +ATOM 176 N SER B 190 42.789 2.011 39.922 1.00 21.11 N +ATOM 177 H SER B 190 42.428 2.859 39.440 1.00 0.00 H +ATOM 178 CA SER B 190 44.138 1.584 39.630 1.00 18.59 C +ATOM 179 C SER B 190 45.044 2.431 40.515 1.00 21.74 C +ATOM 180 O SER B 190 44.903 3.655 40.520 1.00 25.73 O +ATOM 181 CB SER B 190 44.360 1.856 38.224 1.00 16.99 C +ATOM 182 OG SER B 190 43.339 1.204 37.475 1.00 21.96 O +ATOM 183 HG SER B 190 43.379 0.229 37.643 1.00 0.00 H +ATOM 184 N VAL B 191 45.967 1.810 41.249 1.00 24.74 N +ATOM 185 H VAL B 191 46.095 0.793 41.073 1.00 0.00 H +ATOM 186 CA VAL B 191 46.821 2.410 42.279 1.00 23.94 C +ATOM 187 C VAL B 191 48.282 2.017 42.032 1.00 28.32 C +ATOM 188 O VAL B 191 48.490 0.819 41.816 1.00 26.51 O +ATOM 189 CB VAL B 191 46.415 1.873 43.667 1.00 22.27 C +ATOM 190 CG1 VAL B 191 47.116 2.699 44.709 1.00 19.72 C +ATOM 191 CG2 VAL B 191 44.921 1.902 43.863 1.00 17.32 C +ATOM 192 N ASP B 193 52.100 0.901 43.188 1.00 32.22 N +ATOM 193 H ASP B 193 52.442 0.516 42.284 1.00 0.00 H +ATOM 194 CA ASP B 193 52.624 0.376 44.417 1.00 36.91 C +ATOM 195 C ASP B 193 54.083 0.070 44.170 1.00 36.57 C +ATOM 196 O ASP B 193 54.546 0.151 43.028 1.00 36.03 O +ATOM 197 CB ASP B 193 51.848 -0.893 44.766 1.00 39.69 C +ATOM 198 CG ASP B 193 52.234 -1.483 46.113 1.00 43.46 C +ATOM 199 OD1 ASP B 193 52.443 -0.715 47.060 1.00 45.59 O +ATOM 200 OD2 ASP B 193 52.311 -2.709 46.205 1.00 40.83 O +ATOM 201 N ASN B 201 54.530 -1.257 40.276 1.00 44.89 N +ATOM 202 H ASN B 201 54.316 -0.687 41.119 1.00 0.00 H +ATOM 203 CA ASN B 201 53.471 -2.046 39.688 1.00 35.31 C +ATOM 204 C ASN B 201 52.201 -1.308 40.022 1.00 28.84 C +ATOM 205 O ASN B 201 52.138 -0.498 40.938 1.00 28.46 O +ATOM 206 CB ASN B 201 53.470 -3.474 40.283 1.00 35.44 C +ATOM 207 CG ASN B 201 53.355 -3.578 41.779 1.00 34.69 C +ATOM 208 OD1 ASN B 201 52.256 -3.795 42.283 1.00 36.43 O +ATOM 209 ND2 ASN B 201 54.430 -3.465 42.555 1.00 31.82 N +ATOM 210 1HD2 ASN B 201 54.339 -3.560 43.587 1.00 0.00 H +ATOM 211 2HD2 ASN B 201 55.361 -3.281 42.130 1.00 0.00 H +ATOM 212 N VAL B 202 51.241 -1.475 39.171 1.00 22.98 N +ATOM 213 H VAL B 202 51.410 -2.055 38.324 1.00 0.00 H +ATOM 214 CA VAL B 202 49.949 -0.881 39.365 1.00 27.41 C +ATOM 215 C VAL B 202 48.990 -2.001 39.809 1.00 25.27 C +ATOM 216 O VAL B 202 49.088 -3.090 39.274 1.00 29.30 O +ATOM 217 CB VAL B 202 49.568 -0.240 37.991 1.00 26.20 C +ATOM 218 CG1 VAL B 202 48.141 0.237 38.026 1.00 21.51 C +ATOM 219 CG2 VAL B 202 50.551 0.874 37.640 1.00 24.12 C +ATOM 220 N LYS B 203 48.062 -1.873 40.738 1.00 27.36 N +ATOM 221 H LYS B 203 48.022 -0.974 41.260 1.00 0.00 H +ATOM 222 CA LYS B 203 47.085 -2.906 41.087 1.00 26.09 C +ATOM 223 C LYS B 203 45.743 -2.347 40.639 1.00 27.24 C +ATOM 224 O LYS B 203 45.574 -1.117 40.577 1.00 31.40 O +ATOM 225 CB LYS B 203 47.094 -3.113 42.558 1.00 25.93 C +ATOM 226 CG LYS B 203 48.485 -3.555 42.920 1.00 27.51 C +ATOM 227 CD LYS B 203 48.438 -4.073 44.305 1.00 36.45 C +ATOM 228 CE LYS B 203 49.677 -4.909 44.559 1.00 40.35 C +ATOM 229 NZ LYS B 203 50.881 -4.093 44.534 1.00 50.52 N +ATOM 230 HZ1 LYS B 203 50.972 -3.641 43.602 1.00 0.00 H +ATOM 231 HZ2 LYS B 203 50.818 -3.362 45.271 1.00 0.00 H +ATOM 232 HZ3 LYS B 203 51.710 -4.695 44.711 1.00 0.00 H +ATOM 233 N HIS B 204 44.787 -3.173 40.234 1.00 27.53 N +ATOM 234 H HIS B 204 44.979 -4.194 40.278 1.00 0.00 H +ATOM 235 CA HIS B 204 43.494 -2.750 39.735 1.00 27.05 C +ATOM 236 C HIS B 204 42.466 -3.381 40.658 1.00 29.08 C +ATOM 237 O HIS B 204 42.502 -4.595 40.859 1.00 31.79 O +ATOM 238 CB HIS B 204 43.295 -3.245 38.326 1.00 26.61 C +ATOM 239 CG HIS B 204 44.298 -2.581 37.400 1.00 32.81 C +ATOM 240 ND1 HIS B 204 44.260 -1.377 36.811 1.00 34.84 N +ATOM 241 CD2 HIS B 204 45.479 -3.181 37.032 1.00 30.16 C +ATOM 242 CE1 HIS B 204 45.366 -1.254 36.105 1.00 30.74 C +ATOM 243 NE2 HIS B 204 46.085 -2.339 36.252 1.00 28.99 N +ATOM 244 N TYR B 205 41.608 -2.586 41.307 1.00 27.10 N +ATOM 245 H TYR B 205 41.707 -1.561 41.164 1.00 0.00 H +ATOM 246 CA TYR B 205 40.546 -3.036 42.197 1.00 23.95 C +ATOM 247 C TYR B 205 39.205 -2.795 41.513 1.00 23.24 C +ATOM 248 O TYR B 205 38.972 -1.648 41.143 1.00 27.15 O +ATOM 249 CB TYR B 205 40.679 -2.252 43.483 1.00 19.35 C +ATOM 250 CG TYR B 205 41.994 -2.596 44.185 1.00 25.57 C +ATOM 251 CD1 TYR B 205 42.095 -3.696 45.063 1.00 22.15 C +ATOM 252 CD2 TYR B 205 43.095 -1.766 43.975 1.00 23.68 C +ATOM 253 CE1 TYR B 205 43.289 -3.953 45.732 1.00 22.26 C +ATOM 254 CE2 TYR B 205 44.295 -2.025 44.651 1.00 21.52 C +ATOM 255 CZ TYR B 205 44.391 -3.107 45.527 1.00 23.16 C +ATOM 256 OH TYR B 205 45.573 -3.282 46.237 1.00 23.84 O +ATOM 257 HH TYR B 205 45.495 -4.083 46.813 1.00 0.00 H +ATOM 258 N LYS B 206 38.324 -3.763 41.252 1.00 20.58 N +ATOM 259 H LYS B 206 38.549 -4.736 41.542 1.00 0.00 H +ATOM 260 CA LYS B 206 37.074 -3.510 40.584 1.00 18.92 C +ATOM 261 C LYS B 206 36.163 -2.807 41.594 1.00 22.87 C +ATOM 262 O LYS B 206 36.281 -3.036 42.808 1.00 19.82 O +ATOM 263 CB LYS B 206 36.530 -4.854 40.117 1.00 18.87 C +ATOM 264 CG LYS B 206 35.343 -4.649 39.219 1.00 23.91 C +ATOM 265 CD LYS B 206 34.904 -5.927 38.526 1.00 34.16 C +ATOM 266 CE LYS B 206 35.912 -6.521 37.563 1.00 34.73 C +ATOM 267 NZ LYS B 206 35.473 -7.788 36.999 1.00 41.96 N +ATOM 268 HZ1 LYS B 206 35.323 -8.474 37.766 1.00 0.00 H +ATOM 269 HZ2 LYS B 206 34.583 -7.645 36.480 1.00 0.00 H +ATOM 270 HZ3 LYS B 206 36.201 -8.147 36.349 1.00 0.00 H +ATOM 271 N ILE B 207 35.235 -1.935 41.177 1.00 22.25 N +ATOM 272 H ILE B 207 35.134 -1.753 40.158 1.00 0.00 H +ATOM 273 CA ILE B 207 34.360 -1.231 42.116 1.00 24.23 C +ATOM 274 C ILE B 207 33.000 -1.575 41.528 1.00 24.91 C +ATOM 275 O ILE B 207 32.768 -1.268 40.367 1.00 26.71 O +ATOM 276 CB ILE B 207 34.596 0.352 42.116 1.00 22.09 C +ATOM 277 CG1 ILE B 207 36.008 0.799 42.572 1.00 19.37 C +ATOM 278 CG2 ILE B 207 33.573 0.937 43.073 1.00 18.47 C +ATOM 279 CD1 ILE B 207 36.333 2.276 42.264 1.00 14.75 C +ATOM 280 N ARG B 208 32.114 -2.249 42.263 1.00 28.87 N +ATOM 281 H ARG B 208 32.379 -2.508 43.235 1.00 0.00 H +ATOM 282 CA ARG B 208 30.792 -2.645 41.781 1.00 27.49 C +ATOM 283 C ARG B 208 29.721 -1.755 42.344 1.00 30.21 C +ATOM 284 O ARG B 208 29.754 -1.256 43.472 1.00 28.93 O +ATOM 285 CB ARG B 208 30.475 -4.074 42.160 1.00 30.70 C +ATOM 286 CG ARG B 208 31.434 -4.961 41.427 1.00 27.54 C +ATOM 287 CD ARG B 208 30.880 -6.296 41.100 1.00 27.89 C +ATOM 288 NE ARG B 208 31.995 -7.149 40.715 1.00 33.33 N +ATOM 289 HE ARG B 208 32.830 -7.176 41.335 1.00 0.00 H +ATOM 290 CZ ARG B 208 32.005 -7.907 39.598 1.00 33.77 C +ATOM 291 NH1 ARG B 208 30.959 -7.908 38.775 1.00 33.22 N +ATOM 292 1HH1 ARG B 208 30.127 -7.322 38.991 1.00 0.00 H +ATOM 293 2HH1 ARG B 208 30.974 -8.494 37.916 1.00 0.00 H +ATOM 294 NH2 ARG B 208 33.031 -8.745 39.332 1.00 30.77 N +ATOM 295 1HH2 ARG B 208 33.831 -8.814 39.993 1.00 0.00 H +ATOM 296 2HH2 ARG B 208 33.021 -9.321 38.466 1.00 0.00 H +ATOM 297 N TYR B 216 31.181 -0.692 46.167 1.00 18.50 N +ATOM 298 H TYR B 216 31.191 -0.973 45.166 1.00 0.00 H +ATOM 299 CA TYR B 216 31.989 -1.429 47.135 1.00 20.58 C +ATOM 300 C TYR B 216 33.172 -2.079 46.409 1.00 21.78 C +ATOM 301 O TYR B 216 33.147 -2.340 45.190 1.00 18.01 O +ATOM 302 CB TYR B 216 31.114 -2.523 47.859 1.00 14.78 C +ATOM 303 CG TYR B 216 30.711 -3.702 46.958 1.00 22.86 C +ATOM 304 CD1 TYR B 216 31.589 -4.788 46.751 1.00 20.47 C +ATOM 305 CD2 TYR B 216 29.481 -3.686 46.304 1.00 23.18 C +ATOM 306 CE1 TYR B 216 31.225 -5.841 45.905 1.00 19.10 C +ATOM 307 CE2 TYR B 216 29.123 -4.738 45.451 1.00 23.26 C +ATOM 308 CZ TYR B 216 29.997 -5.803 45.250 1.00 21.33 C +ATOM 309 OH TYR B 216 29.660 -6.803 44.357 1.00 25.34 O +ATOM 310 HH TYR B 216 29.541 -6.412 43.455 1.00 0.00 H +ATOM 311 N ILE B 217 34.247 -2.180 47.180 1.00 23.05 N +ATOM 312 H ILE B 217 34.252 -1.694 48.099 1.00 0.00 H +ATOM 313 CA ILE B 217 35.410 -2.940 46.790 1.00 23.46 C +ATOM 314 C ILE B 217 35.340 -4.176 47.728 1.00 25.51 C +ATOM 315 O ILE B 217 35.601 -5.297 47.290 1.00 25.99 O +ATOM 316 CB ILE B 217 36.734 -2.095 47.015 1.00 24.56 C +ATOM 317 CG1 ILE B 217 36.863 -1.021 45.935 1.00 18.29 C +ATOM 318 CG2 ILE B 217 37.978 -3.001 46.931 1.00 21.91 C +ATOM 319 CD1 ILE B 217 38.104 -0.111 46.103 1.00 11.16 C +ATOM 320 N THR B 218 34.911 -4.131 49.006 1.00 25.10 N +ATOM 321 H THR B 218 34.571 -3.219 49.372 1.00 0.00 H +ATOM 322 CA THR B 218 34.892 -5.276 49.908 1.00 23.44 C +ATOM 323 C THR B 218 33.601 -5.312 50.698 1.00 22.58 C +ATOM 324 O THR B 218 32.794 -4.393 50.575 1.00 23.25 O +ATOM 325 CB THR B 218 36.095 -5.266 50.913 1.00 21.01 C +ATOM 326 OG1 THR B 218 35.820 -4.312 51.919 1.00 18.43 O +ATOM 327 HG1 THR B 218 36.570 -4.290 52.565 1.00 0.00 H +ATOM 328 CG2 THR B 218 37.420 -4.995 50.228 1.00 21.32 C +ATOM 329 N SER B 219 33.371 -6.315 51.544 1.00 22.05 N +ATOM 330 H SER B 219 34.114 -7.026 51.701 1.00 0.00 H +ATOM 331 CA SER B 219 32.099 -6.443 52.259 1.00 19.01 C +ATOM 332 C SER B 219 32.050 -5.604 53.522 1.00 19.03 C +ATOM 333 O SER B 219 31.024 -5.390 54.178 1.00 22.90 O +ATOM 334 CB SER B 219 31.918 -7.936 52.570 1.00 16.28 C +ATOM 335 OG SER B 219 33.115 -8.405 53.201 1.00 21.73 O +ATOM 336 HG SER B 219 33.020 -9.368 53.411 1.00 0.00 H +ATOM 337 N TYR B 233 39.179 5.655 51.302 1.00 21.38 N +ATOM 338 H TYR B 233 38.518 5.784 50.509 1.00 0.00 H +ATOM 339 CA TYR B 233 40.430 4.943 51.061 1.00 21.22 C +ATOM 340 C TYR B 233 41.645 5.795 51.030 1.00 24.34 C +ATOM 341 O TYR B 233 42.737 5.219 51.068 1.00 23.73 O +ATOM 342 CB TYR B 233 40.393 4.124 49.755 1.00 20.13 C +ATOM 343 CG TYR B 233 39.543 2.874 50.033 1.00 21.52 C +ATOM 344 CD1 TYR B 233 40.066 1.798 50.793 1.00 26.61 C +ATOM 345 CD2 TYR B 233 38.239 2.802 49.564 1.00 20.40 C +ATOM 346 CE1 TYR B 233 39.288 0.667 51.078 1.00 20.44 C +ATOM 347 CE2 TYR B 233 37.458 1.681 49.836 1.00 23.51 C +ATOM 348 CZ TYR B 233 37.985 0.616 50.595 1.00 24.32 C +ATOM 349 OH TYR B 233 37.179 -0.470 50.881 1.00 22.93 O +ATOM 350 HH TYR B 233 37.690 -1.127 51.417 1.00 0.00 H +ATOM 351 N ALA B 237 43.577 2.341 53.973 1.00 27.35 N +ATOM 352 H ALA B 237 44.336 2.458 54.674 1.00 0.00 H +ATOM 353 CA ALA B 237 43.769 1.418 52.857 1.00 24.20 C +ATOM 354 C ALA B 237 42.832 0.222 52.889 1.00 24.50 C +ATOM 355 O ALA B 237 42.461 -0.271 51.827 1.00 28.18 O +ATOM 356 CB ALA B 237 45.175 0.852 52.836 1.00 21.89 C +ATOM 357 N ASP B 238 42.488 -0.269 54.099 1.00 28.69 N +ATOM 358 H ASP B 238 42.905 0.196 54.931 1.00 0.00 H +ATOM 359 CA ASP B 238 41.583 -1.395 54.354 1.00 26.94 C +ATOM 360 C ASP B 238 41.610 -2.519 53.365 1.00 24.45 C +ATOM 361 O ASP B 238 40.582 -2.973 52.880 1.00 22.95 O +ATOM 362 CB ASP B 238 40.164 -0.930 54.406 1.00 34.00 C +ATOM 363 CG ASP B 238 39.753 -0.316 55.696 1.00 44.27 C +ATOM 364 OD1 ASP B 238 40.586 0.230 56.444 1.00 53.16 O +ATOM 365 OD2 ASP B 238 38.555 -0.355 55.962 1.00 46.80 O +ATOM 366 N GLY B 239 42.808 -2.936 53.003 1.00 23.17 N +ATOM 367 H GLY B 239 43.661 -2.476 53.379 1.00 0.00 H +ATOM 368 CA GLY B 239 42.919 -4.032 52.085 1.00 23.57 C +ATOM 369 C GLY B 239 43.666 -3.655 50.842 1.00 29.48 C +ATOM 370 O GLY B 239 44.189 -4.532 50.158 1.00 34.77 O +ATOM 371 N LEU B 240 43.657 -2.393 50.436 1.00 32.04 N +ATOM 372 H LEU B 240 43.115 -1.685 50.972 1.00 0.00 H +ATOM 373 CA LEU B 240 44.397 -1.984 49.247 1.00 30.72 C +ATOM 374 C LEU B 240 45.903 -2.025 49.563 1.00 30.49 C +ATOM 375 O LEU B 240 46.329 -1.959 50.733 1.00 29.77 O +ATOM 376 CB LEU B 240 44.026 -0.563 48.832 1.00 24.19 C +ATOM 377 CG LEU B 240 42.624 -0.131 48.615 1.00 25.24 C +ATOM 378 CD1 LEU B 240 42.680 1.316 48.184 1.00 24.59 C +ATOM 379 CD2 LEU B 240 41.952 -0.945 47.556 1.00 25.68 C +ATOM 380 N CYS B 241 46.720 -2.085 48.505 1.00 32.52 N +ATOM 381 H CYS B 241 46.316 -2.122 47.547 1.00 0.00 H +ATOM 382 CA CYS B 241 48.159 -2.099 48.673 1.00 30.55 C +ATOM 383 C CYS B 241 48.668 -0.859 49.386 1.00 31.60 C +ATOM 384 O CYS B 241 49.626 -0.929 50.147 1.00 36.57 O +ATOM 385 CB CYS B 241 48.772 -2.218 47.321 1.00 27.85 C +ATOM 386 SG CYS B 241 48.292 -0.852 46.229 1.00 35.25 S +HETATM 387 O HOH 27 34.420 -1.246 49.745 1.00 16.40 O +HETATM 388 O HOH 28 45.725 -5.887 47.041 1.00 44.03 O +HETATM 389 O HOH 31 36.799 -9.190 38.730 1.00 35.38 O +HETATM 390 O HOH 32 37.938 -2.783 52.276 1.00 31.50 O +HETATM 391 O HOH 40 36.631 -1.075 54.001 1.00 39.98 O +HETATM 392 O HOH 43 38.840 -6.219 42.707 1.00 29.27 O +HETATM 393 O HOH 44 45.673 -5.722 39.277 1.00 39.11 O +END diff --git a/pdbbind/data_prepare_model.sh b/pdbbind/data_prepare_model.sh new file mode 100644 index 0000000..8cfcf09 --- /dev/null +++ b/pdbbind/data_prepare_model.sh @@ -0,0 +1,22 @@ +# prepare points file +cd data_folder/v2018-other-PL +bash data_prepare_points.sh + +cd ../refined-set +bash data_prepare_points.sh + +cd ../coreset +bash data_prepare_points.sh + +cd .. +mkdir tfrecords + +# separate the data into train/test/val +cd ../../python +python generate_pdb_list.py + +# generate the tfrecords for tf model. +cd ../../../tensorflow/util +python convert_tfrecords.py --file_dir ../../pdbbind/data_folder --list_file ../../pdbbind/data_folder/points_list_test_reg.txt --records_name ../../pdbbind/data_folder/tfrecords/test_reg_points_den3.tfrecords --label_type float +python convert_tfrecords.py --file_dir ../../pdbbind/data_folder --list_file ../../pdbbind/data_folder/points_list_val_reg.txt --records_name ../../pdbbind/data_folder/tfrecords/val_reg_points_den3.tfrecords --label_type float +python convert_tfrecords.py --file_dir ../../pdbbind/data_folder --list_file ../../pdbbind/data_folder/points_list_train_reg.txt --records_name ../../pdbbind/data_folder/tfrecords/train_reg_points_den3.tfrecords --label_type float diff --git a/pdbbind/java/.DS_Store b/pdbbind/java/.DS_Store new file mode 100644 index 0000000..98a0a47 Binary files /dev/null and b/pdbbind/java/.DS_Store differ diff --git a/pdbbind/java/Surface_for_single.java b/pdbbind/java/Surface_for_single.java new file mode 100644 index 0000000..c3906dc --- /dev/null +++ b/pdbbind/java/Surface_for_single.java @@ -0,0 +1,302 @@ +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.nio.file.Path; +import java.nio.file.Paths; + + +import javax.vecmath.Point3d; + +import org.openscience.cdk.ChemFile; +import org.openscience.cdk.geometry.surface.AdaptiveNumericalSurface; +import org.openscience.cdk.geometry.surface.NumericalSurface; +import org.openscience.cdk.geometry.surface.Point_Type; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IChemObjectBuilder; +import org.openscience.cdk.io.Mol2Reader; +import org.openscience.cdk.io.PDBReader; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.io.MDLV2000Reader; +import org.openscience.cdk.tools.manipulator.ChemFileManipulator; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import javax.vecmath.Point3d; + +import org.openscience.cdk.ChemFile; +import org.openscience.cdk.geometry.surface.AdaptiveNumericalSurface; +import org.openscience.cdk.geometry.surface.NumericalSurface; +import org.openscience.cdk.geometry.surface.Point_Type; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IChemObjectBuilder; +import org.openscience.cdk.io.Mol2Reader; +import org.openscience.cdk.io.PDBReader; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.io.MDLV2000Reader; +import org.openscience.cdk.tools.manipulator.ChemFileManipulator; + +public class Surface_for_single { + //public javax.vecmath.Point3d[] getAllSurfacePoints(); + + public static String get_id(String filename) { + String[] parts = filename.split("."); + String part1 = parts[0]; + String[] parts1 = part1.split("_"); + String id = parts1[0]; + return id; + } + + public static void handle_single_mol2(String filename, String path, String id, int tess_level, String tess_type, String file_type, boolean append) { + + try { + File file = new File(path + "/" + id + "/" + filename); + Mol2Reader reader = new Mol2Reader(new FileInputStream(file)); + ChemFile crambin = reader.read(new ChemFile()); + // ChemFile crambin = reader.read(new ChemFile()); generate warning: org.openscience.cdk.config.atomtypes.AtomTypeHandle WARN: Unrecognized hybridization in config file: tetrahedral + // refer to https://github.com/johnmay/cdk/blob/master/base/core/src/main/java/org/openscience/cdk/config/atomtypes/AtomTypeHandler.java + // Another warning: org.openscience.cdk.io.Mol2Reader WARN: Not reading molecule qualifiers + // refer to https://github.com/cdk/cdk/blob/master/storage/io/src/main/java/org/openscience/cdk/io/Mol2Reader.java + List containers= ChemFileManipulator.getAllAtomContainers(crambin); + String write_filename = path + "/" + id + "/" + id + "_cdk_"+ file_type + ".xyz"; + String atom_filename = path + "/" + id + "/" + id + "_cdk_"+ file_type + ".txt"; + //FileOutputStream outputStream = new FileOutputStream(write_filename); + BufferedWriter writer = new BufferedWriter(new FileWriter(write_filename, append)); + BufferedWriter writer1 = new BufferedWriter(new FileWriter(atom_filename, append)); + System.out.println("container size: " + containers.size()); + for (int i = 0; i< containers.size(); i++) { + IAtomContainer container = containers.get(i); +// String description = container.toString(); + +// AdaptiveNumericalSurface new_surface = new AdaptiveNumericalSurface(container, 0, tess_level, tess_type); + NumericalSurface new_surface = new NumericalSurface(container, 0, tess_level, tess_type); +// System.out.println(container); +// System.out.println(description); +// System.out.println(new_surface); + try { + //new_surface.calculateSurface(); + //Point3d[] points= new_surface.getAllSurfacePoints(); + ArrayList point_types = new_surface.getAllPointswithAtomType(); + +// System.out.println(points); + + System.out.println(point_types.size()); + for (int j = 0; j < point_types.size(); j++) { + org.openscience.cdk.geometry.surface.Point_Type point_type = point_types.get(j); + Point3d coord = (point_type).getCoord(); + int atom = ((Point_Type) point_type).getAtom(); + int atom_index = ((Point_Type) point_type).getIndex(); + String str = coord.x + " " + coord.y + " " + coord.z + "\n"; +// if (j == 1){System.out.println(str);} + writer.write(str); +// writer1.write(atom + "\n"); + writer1.write(atom + " " + atom_index + "\n"); + }} catch (Exception ex) {System.out.println("remind QQ Error: " + ex);} + } + /*IChemObject pdb_mol = reader.read();*/ + reader.close(); + + writer.close(); + writer1.close(); + System.out.println("Finished " + write_filename); + } catch (Exception ex) {System.out.println(ex);} + //return container; + } + + public static void handle_single_pdb(String filename, String path, String id, int tess_level, String tess_type, String file_type) { + //String id = get_id(filename); +// System.out.println(filename); +// String[] parts = filename.split("_"); + //System.out.println(parts); +// String id = parts[0]; +// System.out.println(id); + + + try { + File file = new File(path + "/" + id + "/" + filename); + PDBReader reader = new PDBReader(new FileInputStream(file)); + //IAtomContainer container = reader.read(SilentChemObjectBuilder.getInstance().newInstance(IAtomContainer.class)); + ChemFile crambin = reader.read(new ChemFile()); + List containers= ChemFileManipulator.getAllAtomContainers(crambin); + String write_filename = path + "/" + id + "/" + id + "_cdk_"+ file_type + ".xyz"; + String atom_filename = path + "/" + id + "/" + id + "_cdk_"+ file_type + ".txt"; + //FileOutputStream outputStream = new FileOutputStream(write_filename); + BufferedWriter writer = new BufferedWriter(new FileWriter(write_filename)); + BufferedWriter writer1 = new BufferedWriter(new FileWriter(atom_filename)); + for (int i = 0; i< containers.size(); i++) { + IAtomContainer container = containers.get(i); +// String description = container.toString(); + System.out.println(containers.size()); + +// AdaptiveNumericalSurface new_surface = new AdaptiveNumericalSurface(container, 0, tess_level, tess_type); + NumericalSurface new_surface = new NumericalSurface(container, 0, tess_level, tess_type); +// System.out.println(container); +// System.out.println(description); +// System.out.println(new_surface); + try { + ArrayList point_types = new_surface.getAllPointswithAtomType(); +// System.out.println(points); + System.out.println(point_types.size()); + for (int j = 0; j < point_types.size(); j++) { + org.openscience.cdk.geometry.surface.Point_Type point_type = point_types.get(j); + Point3d coord = (point_type).getCoord(); + int atom = ((Point_Type) point_type).getAtom(); + int atom_index = ((Point_Type) point_type).getIndex(); + String str = coord.x + " " + coord.y + " " + coord.z + "\n"; +// if (j == 1){System.out.println(str);} + writer.write(str); +// writer1.write(atom + "\n"); + writer1.write(atom + " " + atom_index + "\n"); + }} catch (Exception ex) {System.out.println(ex);} + } + /*IChemObject pdb_mol = reader.read();*/ + reader.close(); + + writer.close(); + writer1.close(); + System.out.println("Finished " + write_filename); + } catch (Exception ex) {System.out.println(ex);} + //return container; + } + + public static void handle_single_complex(String filename, String path, String id, int tess_level, String tess_type, String file_type) { + // need revise: to separate ligand/protein. + try { + File file = new File(path + "/" + id + "/" + filename); + PDBReader reader = new PDBReader(new FileInputStream(file)); + ChemFile crambin = reader.read(new ChemFile()); + List containers= ChemFileManipulator.getAllAtomContainers(crambin); + String write_filename = path + "/" + id + "/" + id + "_cdk_"+ file_type + ".xyz"; + String atom_filename = path + "/" + id + "/" + id + "_cdk_"+ file_type + ".txt"; + BufferedWriter writer = new BufferedWriter(new FileWriter(write_filename)); + BufferedWriter writer1 = new BufferedWriter(new FileWriter(atom_filename)); + for (int i = 0; i< containers.size(); i++) { + IAtomContainer container = containers.get(i); + AdaptiveNumericalSurface new_surface = new AdaptiveNumericalSurface(container, 0, tess_level, tess_type); + try { + ArrayList point_types = new_surface.getAllPointswithAtomType(); + System.out.println(point_types.size()); + for (int j = 0; j < point_types.size(); j++) { + org.openscience.cdk.geometry.surface.Point_Type point_type = point_types.get(j); + Point3d coord = (point_type).getCoord(); + int atom = ((Point_Type) point_type).getAtom(); + int atom_index = ((Point_Type) point_type).getIndex(); + String str = coord.x + " " + coord.y + " " + coord.z + "\n"; + writer.write(str); +// writer1.write(atom + "\n"); + writer1.write(atom + " " + atom_index + "\n"); + }} catch (Exception ex) {System.out.println(ex);} + } + reader.close(); + + writer.close(); + writer1.close(); + System.out.println("Finished " + write_filename); + } catch (Exception ex) {System.out.println(ex);} + } + + public static void handle_single_id(File complex_id, String path, int tess_level, String tess_type, Boolean protein_flag, String source) { + //File folder = new File("your/path"); + String id_string = complex_id.getName(); + File[] listOfFiles = complex_id.listFiles(); + + if (source.equals("pdbbind")) { + boolean append = false; + for (int i = 0; i < listOfFiles.length; i++) { + String file_name = listOfFiles[i].getName(); + + if (file_name.endsWith("ligand.mol2")){ + System.out.println("Start work on: " + file_name); + handle_single_mol2(file_name, path, id_string, tess_level, tess_type, "ligand", append); + System.out.println("Finished: " + file_name); + append = false; + } + else if(file_name.endsWith("pocket.pdb")) { + System.out.println("Start work on: " + file_name); + handle_single_pdb(file_name, path, id_string, tess_level, tess_type, "pocket"); + System.out.println("Finished: " + file_name); + } + else if(protein_flag && file_name.endsWith("protein.pdb")) { + System.out.println("Start work on: " + file_name); + handle_single_pdb(file_name, path, id_string, tess_level-2, tess_type, "protein"); + System.out.println("Finished: " + file_name); + } + } // end for + + } // end if source + else if (source.equals("pdbbank")) { + boolean append = false; + for (int i = 0; i < listOfFiles.length; i++) { + String file_name = listOfFiles[i].getName(); + if(file_name.endsWith("_withHs.pdb")) { + handle_single_complex(file_name, path, id_string, tess_level-2, tess_type, "protein"); + } + }// end for + } + else if (source.equals("astex")) { + boolean append = false; + for (int i = 0; i < listOfFiles.length; i++) { + String file_name = listOfFiles[i].getName(); + + if (file_name.endsWith("ligand.mol2")){ + System.out.println("Start work on: " + file_name); + handle_single_mol2(file_name, path, id_string, tess_level, tess_type, "ligand", append); + System.out.println("Finished: " + file_name); + append = false; + } + else if(file_name.endsWith("protein.mol2")) { + System.out.println("Start work on: " + file_name); + handle_single_mol2(file_name, path, id_string, tess_level, tess_type, "pocket", append); + System.out.println("Finished: " + file_name); + } + } // end for + + } + + else { + System.out.println("This File Source is not supported: " + source); + } + + } + public static void main(String[] args) { + + int tess_level = Integer.parseInt(args[0]); + String tess_type = args[1]; + String source = args[2]; + + Path currentRelativePath = Paths.get(""); + String s = currentRelativePath.toAbsolutePath().toString(); + String[] array = s.split("/"); + +// String complex_id = array[array.length-1]; + array = Arrays.copyOf(array, array.length - 1); + String dataset_path = String.join("/", array); + + File complex_file = new File(s); + + if (complex_file.isDirectory()) { + String id_string = complex_file.getName(); + handle_single_id(complex_file, dataset_path, tess_level, tess_type, false, source); + } + else { + System.out.println(s + "is not a folder"); + } + + } +} + diff --git a/pdbbind/java/cdk-2.3-SNAPSHOT.jar b/pdbbind/java/cdk-2.3-SNAPSHOT.jar new file mode 100644 index 0000000..1c01c94 Binary files /dev/null and b/pdbbind/java/cdk-2.3-SNAPSHOT.jar differ diff --git a/pdbbind/python/.DS_Store b/pdbbind/python/.DS_Store new file mode 100644 index 0000000..de8af17 Binary files /dev/null and b/pdbbind/python/.DS_Store differ diff --git a/pdbbind/python/Elements.java b/pdbbind/python/Elements.java new file mode 100755 index 0000000..070bb3a --- /dev/null +++ b/pdbbind/python/Elements.java @@ -0,0 +1,509 @@ +/* + * Copyright (C) 2006-2012 Egon Willighagen + * 2014 Mark B Vine (orcid:0000-0002-7794-0426) + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may distribute + * with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ +package org.openscience.cdk.config; + +import org.openscience.cdk.interfaces.IElement; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +/** + * Enumeration of chemical elements. Data is taken from the Blue Obelisk Data + * Repository, version 3. This enumeration is auto-generated with utilities + * found in the 'cdk-build-utils' project. + * + * @author egonw + * @author john may + * @cdk.module core + * @cdk.githash + */ +public enum Elements { + Unknown(0, "", 0, 0, null, 0.00, null), + Hydrogen(1, "H", 1, 1, 0.37, 1.20, 2.20), + Helium(2, "He", 1, 18, 0.32, 1.40, null), + Lithium(3, "Li", 2, 1, 1.34, 2.20, 0.98), + Beryllium(4, "Be", 2, 2, 0.90, 1.90, 1.57), + Boron(5, "B", 2, 13, 0.82, 1.80, 2.04), + Carbon(6, "C", 2, 14, 0.77, 1.70, 2.55), + Nitrogen(7, "N", 2, 15, 0.75, 1.60, 3.04), + Oxygen(8, "O", 2, 16, 0.73, 1.55, 3.44), + Fluorine(9, "F", 2, 17, 0.71, 1.50, 3.98), + Neon(10, "Ne", 2, 18, 0.69, 1.54, null), + Sodium(11, "Na", 3, 1, 1.54, 2.40, 0.93), + Magnesium(12, "Mg", 3, 2, 1.30, 2.20, 1.31), + Aluminium(13, "Al", 3, 13, 1.18, 2.10, 1.61), + Silicon(14, "Si", 3, 14, 1.11, 2.10, 1.90), + Phosphorus(15, "P", 3, 15, 1.06, 1.95, 2.19), + Sulfur(16, "S", 3, 16, 1.02, 1.80, 2.58), + Chlorine(17, "Cl", 3, 17, 0.99, 1.80, 3.16), + Argon(18, "Ar", 3, 18, 0.97, 1.88, null), + Potassium(19, "K", 4, 1, 1.96, 2.80, 0.82), + Calcium(20, "Ca", 4, 2, 1.74, 2.40, 1.00), + Scandium(21, "Sc", 4, 3, 1.44, 2.30, 1.36), + Titanium(22, "Ti", 4, 4, 1.36, 2.15, 1.54), + Vanadium(23, "V", 4, 5, 1.25, 2.05, 1.63), + Chromium(24, "Cr", 4, 6, 1.27, 2.05, 1.66), + Manganese(25, "Mn", 4, 7, 1.39, 2.05, 1.55), + Iron(26, "Fe", 4, 8, 1.25, 2.05, 1.83), + Cobalt(27, "Co", 4, 9, 1.26, 2.0, 1.88), + Nickel(28, "Ni", 4, 10, 1.21, 2.0, 1.91), + Copper(29, "Cu", 4, 11, 1.38, 2.0, 1.90), + Zinc(30, "Zn", 4, 12, 1.31, 2.10, 1.65), + Gallium(31, "Ga", 4, 13, 1.26, 2.10, 1.81), + Germanium(32, "Ge", 4, 14, 1.22, 2.10, 2.01), + Arsenic(33, "As", 4, 15, 1.19, 2.05, 2.18), + Selenium(34, "Se", 4, 16, 1.16, 1.90, 2.55), + Bromine(35, "Br", 4, 17, 1.14, 1.90, 2.96), + Krypton(36, "Kr", 4, 18, 1.10, 2.02, 3.00), + Rubidium(37, "Rb", 5, 1, 2.11, 2.90, 0.82), + Strontium(38, "Sr", 5, 2, 1.92, 2.55, 0.95), + Yttrium(39, "Y", 5, 3, 1.62, 2.40, 1.22), + Zirconium(40, "Zr", 5, 4, 1.48, 2.30, 1.33), + Niobium(41, "Nb", 5, 5, 1.37, 2.15, 1.60), + Molybdenum(42, "Mo", 5, 6, 1.45, 2.10, 2.16), + Technetium(43, "Tc", 5, 7, 1.56, 2.05, 1.90), + Ruthenium(44, "Ru", 5, 8, 1.26, 2.05, 2.20), + Rhodium(45, "Rh", 5, 9, 1.35, 2.0, 2.28), + Palladium(46, "Pd", 5, 10, 1.31, 2.05, 2.20), + Silver(47, "Ag", 5, 11, 1.53, 2.10, 1.93), + Cadmium(48, "Cd", 5, 12, 1.48, 2.20, 1.69), + Indium(49, "In", 5, 13, 1.44, 2.20, 1.78), + Tin(50, "Sn", 5, 14, 1.41, 2.25, 1.96), + Antimony(51, "Sb", 5, 15, 1.38, 2.20, 2.05), + Tellurium(52, "Te", 5, 16, 1.35, 2.10, 2.10), + Iodine(53, "I", 5, 17, 1.33, 2.10, 2.66), + Xenon(54, "Xe", 5, 18, 1.30, 2.16, 2.60), + Caesium(55, "Cs", 6, 1, 2.25, 3.00, 0.79), + Barium(56, "Ba", 6, 2, 1.98, 2.70, 0.89), + Lanthanum(57, "La", 6, 3, 1.69, 2.50, 1.10), + Cerium(58, "Ce", 6, 0, null, 2.48, 1.12), + Praseodymium(59, "Pr", 6, 0, null, 2.47, 1.13), + Neodymium(60, "Nd", 6, 0, null, 2.45, 1.14), + Promethium(61, "Pm", 6, 0, null, 2.43, null), + Samarium(62, "Sm", 6, 0, null, 2.42, 1.17), + Europium(63, "Eu", 6, 0, 2.40, 2.40, null), + Gadolinium(64, "Gd", 6, 0, null, 2.38, 1.20), + Terbium(65, "Tb", 6, 0, null, 2.37, null), + Dysprosium(66, "Dy", 6, 0, null, 2.35, 1.22), + Holmium(67, "Ho", 6, 0, null, 2.33, 1.23), + Erbium(68, "Er", 6, 0, null, 2.32, 1.24), + Thulium(69, "Tm", 6, 0, null, 2.30, 1.25), + Ytterbium(70, "Yb", 6, 0, null, 2.28, null), + Lutetium(71, "Lu", 6, 0, 1.60, 2.27, 1.27), + Hafnium(72, "Hf", 6, 4, 1.50, 2.25, 1.30), + Tantalum(73, "Ta", 6, 5, 1.38, 2.20, 1.50), + Tungsten(74, "W", 6, 6, 1.46, 2.10, 2.36), + Rhenium(75, "Re", 6, 7, 1.59, 2.05, 1.90), + Osmium(76, "Os", 6, 8, 1.28, 2.0, 2.20), + Iridium(77, "Ir", 6, 9, 1.37, 2.0, 2.20), + Platinum(78, "Pt", 6, 10, 1.28, 2.05, 2.28), + Gold(79, "Au", 6, 11, 1.44, 2.10, 2.54), + Mercury(80, "Hg", 6, 12, 1.49, 2.05, 2.00), + Thallium(81, "Tl", 6, 13, 1.48, 2.20, 1.62), + Lead(82, "Pb", 6, 14, 1.47, 2.30, 2.33), + Bismuth(83, "Bi", 6, 15, 1.46, 2.30, 2.02), + Polonium(84, "Po", 6, 16, 1.46, null, 2.00), + Astatine(85, "At", 6, 17, null, null, 2.20), + Radon(86, "Rn", 6, 18, 1.45, null, null), + Francium(87, "Fr", 7, 1, null, null, 0.70), + Radium(88, "Ra", 7, 2, null, null, 0.90), + Actinium(89, "Ac", 7, 3, null, null, 1.10), + Thorium(90, "Th", 7, 0, null, 2.40, 1.30), + Protactinium(91, "Pa", 7, 0, null, null, 1.50), + Uranium(92, "U", 7, 0, null, 2.30, 1.38), + Neptunium(93, "Np", 7, 0, null, null, 1.36), + Plutonium(94, "Pu", 7, 0, null, null, 1.28), + Americium(95, "Am", 7, 0, null, null, 1.30), + Curium(96, "Cm", 7, 0, null, null, 1.30), + Berkelium(97, "Bk", 7, 0, null, null, 1.30), + Californium(98, "Cf", 7, 0, null, null, 1.30), + Einsteinium(99, "Es", 7, 0, null, null, 1.30), + Fermium(100, "Fm", 7, 0, null, null, 1.30), + Mendelevium(101, "Md", 7, 0, null, null, 1.30), + Nobelium(102, "No", 7, 0, null, null, 1.30), + Lawrencium(103, "Lr", 7, 0, null, null, null), + Rutherfordium(104, "Rf", 7, 4, null, null, null), + Dubnium(105, "Db", 7, 5, null, null, null), + Seaborgium(106, "Sg", 7, 6, null, null, null), + Bohrium(107, "Bh", 7, 7, null, null, null), + Hassium(108, "Hs", 7, 8, null, null, null), + Meitnerium(109, "Mt", 7, 9, null, null, null), + Darmstadtium(110, "Ds", 7, 10, null, null, null), + Roentgenium(111, "Rg", 7, 11, null, null, null), + Copernicium(112, "Cn", 7, 12, null, null, null), + @Deprecated + Ununtrium(113, "Uut", 7, 13, null, null, null), + Nihonium(113, "Nh", 7, 13, null, null, null), + Flerovium(114, "Fl", 7, 14, null, null, null), + @Deprecated + Ununpentium(115, "Uup", 7, 15, null, null, null), + Moscovium(115, "Mc", 7, 15, null, null, null), + Livermorium(116, "Lv", 7, 16, null, null, null), + @Deprecated + Ununseptium(117, "Uus", 7, 17, null, null, null), + Tennessine(117, "Ts", 7, 17, null, null, null), + @Deprecated + Ununoctium(118, "Uuo", 7, 18, null, null, null), + Oganesson(118, "Og", 7, 18, null, null, null); + + /** + * Atomic number, periodic table period and group. + */ + private final int number, period, group; + + /** + * The symbol of the element. + */ + private final String symbol; + + /** + * Covalent radius (rcov), van der Waals radius + * (rw) and Pauling electronegativity. + */ + private final Double rCov, rW, electronegativity; + + /** + * An {@link IElement} instance of this element. + */ + private final IElement instance; + + /** + * Lookup elements by atomic number. + */ + static final Elements[] NUMER_MAP = new Elements[119]; + + /** + * Lookup elements by symbol / name. + */ + static final Map SYMBOL_MAP = new HashMap(400); + + static { + // index elements + for (final Elements e : values()) { + NUMER_MAP[e.number] = e; + SYMBOL_MAP.put(e.symbol.toLowerCase(Locale.ENGLISH), e); + SYMBOL_MAP.put(e.name().toLowerCase(Locale.ENGLISH), e); + } + + // recently named elements + SYMBOL_MAP.put("uub", Copernicium); // 2009 + SYMBOL_MAP.put("ununbium", Copernicium); + + SYMBOL_MAP.put("uuq", Flerovium); // 2012 + SYMBOL_MAP.put("ununquadium", Flerovium); + + SYMBOL_MAP.put("uuh", Livermorium); // 2012 + SYMBOL_MAP.put("ununhexium", Livermorium); + + // 2016 + SYMBOL_MAP.put("uut", Nihonium); + SYMBOL_MAP.put("uup", Moscovium); + SYMBOL_MAP.put("uus", Tennessine); + SYMBOL_MAP.put("uuo", Oganesson); + + // alternative spellings + SYMBOL_MAP.put("sulphur", Sulfur); + SYMBOL_MAP.put("cesium", Caesium); + SYMBOL_MAP.put("aluminum", Aluminium); + + } + + /** + * Internal constructor. + * + * @param number atomic number + * @param symbol symbol + * @param period periodic table period + * @param group periodic table group + * @param rCov covalent radius + * @param rW van der Waals radius + * @param electronegativity pauling electronegativity + */ + private Elements(int number, String symbol, int period, int group, Double rCov, Double rW, Double electronegativity) { + this.number = number; + this.period = period; + this.group = group; + this.symbol = symbol; + this.rCov = rCov; + this.rW = rW; + this.electronegativity = electronegativity; + this.instance = new NaturalElement(symbol, number); + } + + /** + * The atomic number of the element. An {@link #Unknown} element + * has an atomic number of '0'. + * + * @return 0 - 116 + */ + public int number() { + return number; + } + + /** + * The element symbol, C for carbon, N for nitrogen, Na for sodium, etc. An + * {@link #Unknown} element has no symbol. + * + * @return the symbol + */ + public String symbol() { + return symbol; + } + + /** + * Return the period in the periodic table this element belongs to. If + * the element is {@link #Unknown} it's period is 0. + * + * @return a period in the periodic table + */ + public int period() { + return period; + } + + /** + * Return the group in the periodic table this element belongs to. If + * the element does not belong to a group then it's group is '0'. + * + * @return a group in the periodic table + */ + public int group() { + return group; + } + + /** + * The covalent radius, rcov, is a measure of the + * size of an atom that forms part of one covalent bond. + * + * @return covalent radius - null if not available + * @see Covalent radius + */ + public Double covalentRadius() { + return rCov; + } + + /** + * The van der Waals radius, rw, of an atom is the + * radius of an imaginary hard sphere which can be used to model the + * atom. + * + * @return van der Waals radius - null if not available + * @see Van de Waals radius + */ + public Double vdwRadius() { + return rW; + } + + /** + * Electronegativity, symbol χ, is a chemical property that describes + * the tendency of an atom or a functional group to attract electrons + * (or electron density) towards itself. This method provides access to the + * Pauling electronegativity value for a chemical element. If no value is + * available 'null' is returned. + * + * @return Pauling electronegativity - null if not available + * @see Pauling Electronegativity + */ + public Double electronegativity() { + return electronegativity; + } + + /** + * Access an {@link IElement} instance of the chemical element. + * + * @return an instance + */ + public IElement toIElement() { + return instance; + } + + /** + * Obtain the element with the specified atomic number. If no element had + * the specified atomic number then {@link #Unknown} is returned. + * + *
+     *     // carbon
+     *     Elements e = Elements.ofNumber(6);
+     *
+     *     // oxygen
+     *     Elements e = Elements.ofNumber(8);
+     * 
+ * + * @param number atomic number + * @return an element, or {@link #Unknown} + */ + public static Elements ofNumber(final int number) { + if (number < 0 || number > 118) return Unknown; + return NUMER_MAP[number]; + } + + /** + * Obtain the element with the specified symbol or name. If no element had + * the specified symbol or name then {@link #Unknown} is returned. The + * input is case-insensitive. + * + *
+     *     // carbon
+     *     Elements e = Elements.ofString("c");
+     *     Elements e = Elements.ofString("C");
+     *     Elements e = Elements.ofString("Carbon");
+     *     Elements e = Elements.ofString("carbon");
+     * 
+ * + * @param str input string + * @return an element, or {@link #Unknown} + */ + public static Elements ofString(final String str) { + if (str == null) return Unknown; + Elements e = SYMBOL_MAP.get(str.toLowerCase(Locale.ENGLISH)); + if (e == null) return Unknown; + return e; + } + + /** These instances are for backards compatability. */ + public final static IElement DUMMY = Unknown.toIElement(); + public final static IElement HYDROGEN = Hydrogen.toIElement(); + public final static IElement HELIUM = Helium.toIElement(); + public final static IElement LITHIUM = Lithium.toIElement(); + public final static IElement BERYLLIUM = Beryllium.toIElement(); + public final static IElement BORON = Boron.toIElement(); + public final static IElement CARBON = Carbon.toIElement(); + public final static IElement NITROGEN = Nitrogen.toIElement(); + public final static IElement OXYGEN = Oxygen.toIElement(); + public final static IElement FLUORINE = Fluorine.toIElement(); + public final static IElement NEON = Neon.toIElement(); + public final static IElement SODIUM = Sodium.toIElement(); + public final static IElement MAGNESIUM = Magnesium.toIElement(); + public final static IElement ALUMINIUM = Aluminium.toIElement(); + public final static IElement SILICON = Silicon.toIElement(); + public final static IElement PHOSPHORUS = Phosphorus.toIElement(); + public final static IElement SULFUR = Sulfur.toIElement(); + public final static IElement CHLORINE = Chlorine.toIElement(); + public final static IElement ARGON = Argon.toIElement(); + public final static IElement POTASSIUM = Potassium.toIElement(); + public final static IElement CALCIUM = Calcium.toIElement(); + public final static IElement SCANDIUM = Scandium.toIElement(); + public final static IElement TITANIUM = Titanium.toIElement(); + public final static IElement VANADIUM = Vanadium.toIElement(); + public final static IElement CHROMIUM = Chromium.toIElement(); + public final static IElement MANGANESE = Manganese.toIElement(); + public final static IElement IRON = Iron.toIElement(); + public final static IElement COBALT = Cobalt.toIElement(); + public final static IElement NICKEL = Nickel.toIElement(); + public final static IElement COPPER = Copper.toIElement(); + public final static IElement ZINC = Zinc.toIElement(); + public final static IElement GALLIUM = Gallium.toIElement(); + public final static IElement GERMANIUM = Germanium.toIElement(); + public final static IElement ARSENIC = Arsenic.toIElement(); + public final static IElement SELENIUM = Selenium.toIElement(); + public final static IElement BROMINE = Bromine.toIElement(); + public final static IElement KRYPTON = Krypton.toIElement(); + public final static IElement RUBIDIUM = Rubidium.toIElement(); + public final static IElement STRONTIUM = Strontium.toIElement(); + public final static IElement YTTRIUM = Yttrium.toIElement(); + public final static IElement ZIRCONIUM = Zirconium.toIElement(); + public final static IElement NIOBIUM = Niobium.toIElement(); + public final static IElement MOLYBDENUM = Molybdenum.toIElement(); + public final static IElement TECHNETIUM = Technetium.toIElement(); + public final static IElement RUTHENIUM = Ruthenium.toIElement(); + public final static IElement RHODIUM = Rhodium.toIElement(); + public final static IElement PALLADIUM = Palladium.toIElement(); + public final static IElement SILVER = Silver.toIElement(); + public final static IElement CADMIUM = Cadmium.toIElement(); + public final static IElement INDIUM = Indium.toIElement(); + public final static IElement TIN = Tin.toIElement(); + public final static IElement ANTIMONY = Antimony.toIElement(); + public final static IElement TELLURIUM = Tellurium.toIElement(); + public final static IElement IODINE = Iodine.toIElement(); + public final static IElement XENON = Xenon.toIElement(); + public final static IElement CAESIUM = Caesium.toIElement(); + public final static IElement BARIUM = Barium.toIElement(); + public final static IElement LANTHANUM = Lanthanum.toIElement(); + public final static IElement CERIUM = Cerium.toIElement(); + public final static IElement PRASEODYMIUM = Praseodymium.toIElement(); + public final static IElement NEODYMIUM = Neodymium.toIElement(); + public final static IElement PROMETHIUM = Promethium.toIElement(); + public final static IElement SAMARIUM = Samarium.toIElement(); + public final static IElement EUROPIUM = Europium.toIElement(); + public final static IElement GADOLINIUM = Gadolinium.toIElement(); + public final static IElement TERBIUM = Terbium.toIElement(); + public final static IElement DYSPROSIUM = Dysprosium.toIElement(); + public final static IElement HOLMIUM = Holmium.toIElement(); + public final static IElement ERBIUM = Erbium.toIElement(); + public final static IElement THULIUM = Thulium.toIElement(); + public final static IElement YTTERBIUM = Ytterbium.toIElement(); + public final static IElement LUTETIUM = Lutetium.toIElement(); + public final static IElement HAFNIUM = Hafnium.toIElement(); + public final static IElement TANTALUM = Tantalum.toIElement(); + public final static IElement TUNGSTEN = Tungsten.toIElement(); + public final static IElement RHENIUM = Rhenium.toIElement(); + public final static IElement OSMIUM = Osmium.toIElement(); + public final static IElement IRIDIUM = Iridium.toIElement(); + public final static IElement PLATINUM = Platinum.toIElement(); + public final static IElement GOLD = Gold.toIElement(); + public final static IElement MERCURY = Mercury.toIElement(); + public final static IElement THALLIUM = Thallium.toIElement(); + public final static IElement LEAD = Lead.toIElement(); + public final static IElement BISMUTH = Bismuth.toIElement(); + public final static IElement POLONIUM = Polonium.toIElement(); + public final static IElement ASTATINE = Astatine.toIElement(); + public final static IElement RADON = Radon.toIElement(); + public final static IElement FRANCIUM = Francium.toIElement(); + public final static IElement RADIUM = Radium.toIElement(); + public final static IElement ACTINIUM = Actinium.toIElement(); + public final static IElement THORIUM = Thorium.toIElement(); + public final static IElement PROTACTINIUM = Protactinium.toIElement(); + public final static IElement URANIUM = Uranium.toIElement(); + public final static IElement NEPTUNIUM = Neptunium.toIElement(); + public final static IElement PLUTONIUM = Plutonium.toIElement(); + public final static IElement AMERICIUM = Americium.toIElement(); + public final static IElement CURIUM = Curium.toIElement(); + public final static IElement BERKELIUM = Berkelium.toIElement(); + public final static IElement CALIFORNIUM = Californium.toIElement(); + public final static IElement EINSTEINIUM = Einsteinium.toIElement(); + public final static IElement FERMIUM = Fermium.toIElement(); + public final static IElement MENDELEVIUM = Mendelevium.toIElement(); + public final static IElement NOBELIUM = Nobelium.toIElement(); + public final static IElement LAWRENCIUM = Lawrencium.toIElement(); + public final static IElement RUTHERFORDIUM = Rutherfordium.toIElement(); + public final static IElement DUBNIUM = Dubnium.toIElement(); + public final static IElement SEABORGIUM = Seaborgium.toIElement(); + public final static IElement BOHRIUM = Bohrium.toIElement(); + public final static IElement HASSIUM = Hassium.toIElement(); + public final static IElement MEITNERIUM = Meitnerium.toIElement(); + public final static IElement DARMSTADTIUM = Darmstadtium.toIElement(); + public final static IElement ROENTGENIUM = Roentgenium.toIElement(); + public final static IElement UNUNBIUM = Copernicium.toIElement(); + public final static IElement UNUNTRIUM = Ununtrium.toIElement(); + public final static IElement UNUNQUADIUM = Flerovium.toIElement(); + public final static IElement FLEROVIUM = Flerovium.toIElement(); + public final static IElement UNUNPENTIUM = Ununpentium.toIElement(); + public final static IElement UNUNHEXIUM = Livermorium.toIElement(); + public final static IElement LIVERMORIUM = Livermorium.toIElement(); + + // Incorrect spelling + @Deprecated + public final static IElement PLUTOMNIUM = PLUTONIUM; +} diff --git a/pdbbind/python/atomic_feature.py b/pdbbind/python/atomic_feature.py new file mode 100644 index 0000000..b2dc274 --- /dev/null +++ b/pdbbind/python/atomic_feature.py @@ -0,0 +1,426 @@ +# For OctSurfNet, get the atomic features for pocket and ligand. + +import pickle + +import numpy as np +# import pybel +# import openbabel +from openbabel import pybel # openbabel: version 3 +from openbabel import openbabel as ob + + +# from math import ceil, sin, cos, sqrt, pi +# from itertools import combinations +import os + + +def get_cdk_vdwr_dic(file = '../../../python/Elements.java'): + """extract van der waal radius from cdk Element.java file""" + cdk_vdwr_dic = {} + with open(file, 'r') as f: + lines = f.readlines() + start = False + for line in lines: + if 'public enum Elements {' in line: + start = True + continue + if start: + if '@Deprecated' in line: + continue + contents = line.split('(') + contents = contents[1] + row = contents.split(',') + atomic_number = int(row[0]) + radius = row[-3] + if 'null' in radius: + # print(line) + pass + else: + cdk_vdwr_dic[atomic_number] = float(radius) + if 'Oganesson' in line: + break + else: + continue + return cdk_vdwr_dic +cdk_vdwr_dic = get_cdk_vdwr_dic() +# print(cdk_vdwr_dic) + +class Featurizer(): + """Calcaulates atomic features for molecules. Features can encode atom type, + native pybel properties or any property defined with SMARTS patterns + + Attributes + ---------- + FEATURE_NAMES: list of strings + Labels for features (in the same order as features) + NUM_ATOM_CLASSES: int + Number of atom codes + ATOM_CODES: dict + Dictionary mapping atomic numbers to codes + NAMED_PROPS: list of string + Names of atomic properties to retrieve from pybel.Atom object + CALLABLES: list of callables + Callables used to calculcate custom atomic properties + SMARTS: list of SMARTS strings + SMARTS patterns defining additional atomic properties + """ + + def __init__(self, atom_codes=None, atom_labels=None, + named_properties=None, save_molecule_codes=True, + custom_properties=None, smarts_properties=None, + smarts_labels=None): + + """Creates Featurizer with specified types of features. Elements of a + feature vector will be in a following order: atom type encoding + (defined by atom_codes), Pybel atomic properties (defined by + named_properties), molecule code (if present), custom atomic properties + (defined `custom_properties`), and additional properties defined with + SMARTS (defined with `smarts_properties`). + + Parameters + ---------- + atom_codes: dict, optional + Dictionary mapping atomic numbers to codes. It will be used for + one-hot encoging therefore if n different types are used, codes + shpuld be from 0 to n-1. Multiple atoms can have the same code, + e.g. you can use {6: 0, 7: 1, 8: 1} to encode carbons with [1, 0] + and nitrogens and oxygens with [0, 1] vectors. If not provided, + default encoding is used. + atom_labels: list of strings, optional + Labels for atoms codes. It should have the same length as the + number of used codes, e.g. for `atom_codes={6: 0, 7: 1, 8: 1}` you + should provide something like ['C', 'O or N']. If not specified + labels 'atom0', 'atom1' etc are used. If `atom_codes` is not + specified this argument is ignored. + named_properties: list of strings, optional + Names of atomic properties to retrieve from pybel.Atom object. If + not specified ['hyb', 'heavyvalence', 'heterovalence', + 'partialcharge'] is used. + save_molecule_codes: bool, optional (default True) + If set to True, there will be an additional feature to save + molecule code. It is usefeul when saving molecular complex in a + single array. + custom_properties: list of callables, optional + Custom functions to calculate atomic properties. Each element of + this list should be a callable that takes pybel.Atom object and + returns a float. If callable has `__name__` property it is used as + feature label. Otherwise labels 'func' etc are used, where i is + the index in `custom_properties` list. + smarts_properties: list of strings, optional + Additional atomic properties defined with SMARTS patterns. These + patterns should match a single atom. If not specified, deafult + patterns are used. + smarts_labels: list of strings, optional + Labels for properties defined with SMARTS. Should have the same + length as `smarts_properties`. If not specified labels 'smarts0', + 'smarts1' etc are used. If `smarts_properties` is not specified + this argument is ignored. + """ + + # Remember namse of all features in the correct order + self.FEATURE_NAMES = [] + + if atom_codes is not None: + if not isinstance(atom_codes, dict): + raise TypeError('Atom codes should be dict, got %s instead' + % type(atom_codes)) + codes = set(atom_codes.values()) + for i in range(len(codes)): + if i not in codes: + raise ValueError('Incorrect atom code %s' % i) + + self.NUM_ATOM_CLASSES = len(codes) + self.ATOM_CODES = atom_codes + if atom_labels is not None: + if len(atom_labels) != self.NUM_ATOM_CLASSES: + raise ValueError('Incorrect number of atom labels: ' + '%s instead of %s' + % (len(atom_labels), self.NUM_ATOM_CLASSES)) + else: + atom_labels = ['atom%s' % i for i in range(self.NUM_ATOM_CLASSES)] + self.FEATURE_NAMES += atom_labels + else: + self.ATOM_CODES = {} + + metals = ([3, 4, 11, 12, 13] + list(range(19, 32)) + + list(range(37, 51)) + list(range(55, 84)) + + list(range(87, 104))) + + # List of tuples (atomic_num, class_name) with atom types to encode. + atom_classes = [ + (1, 'H'), # QQ add: H also considered in surface. + (5, 'B'), + (6, 'C'), + (7, 'N'), + (8, 'O'), + (15, 'P'), + (16, 'S'), + (34, 'Se'), + ([9, 17, 35, 53], 'halogen'), + (metals, 'metal') + ] + + for code, (atom, name) in enumerate(atom_classes): + if type(atom) is list: + for a in atom: + self.ATOM_CODES[a] = code + else: + self.ATOM_CODES[atom] = code + self.FEATURE_NAMES.append(name) + + self.NUM_ATOM_CLASSES = len(atom_classes) + + if named_properties is not None: + if not isinstance(named_properties, (list, tuple, np.ndarray)): + raise TypeError('named_properties must be a list') + allowed_props = [prop for prop in dir(pybel.Atom) + if not prop.startswith('__')] + for prop_id, prop in enumerate(named_properties): + if prop not in allowed_props: + raise ValueError( + 'named_properties must be in pybel.Atom attributes,' + ' %s was given at position %s' % (prop_id, prop) + ) + self.NAMED_PROPS = named_properties + else: + # pybel.Atom properties to save + self.NAMED_PROPS = ['hyb', 'heavyvalence', 'heterovalence', + 'partialcharge', 'radius'] + self.FEATURE_NAMES += self.NAMED_PROPS + + if not isinstance(save_molecule_codes, bool): + raise TypeError('save_molecule_codes should be bool, got %s ' + 'instead' % type(save_molecule_codes)) + self.save_molecule_codes = save_molecule_codes + if save_molecule_codes: + # Remember if an atom belongs to the ligand or to the protein + self.FEATURE_NAMES.append('molcode') + + self.CALLABLES = [] + if custom_properties is not None: + for i, func in enumerate(custom_properties): + if not callable(func): + raise TypeError('custom_properties should be list of' + ' callables, got %s instead' % type(func)) + name = getattr(func, '__name__', '') + if name == '': + name = 'func%s' % i + self.CALLABLES.append(func) + self.FEATURE_NAMES.append(name) + + if smarts_properties is None: + # SMARTS definition for other properties + self.SMARTS = [ + '[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]', + '[a]', + '[!$([#1,#6,F,Cl,Br,I,o,s,nX3,#7v5,#15v5,#16v4,#16v6,*+1,*+2,*+3])]', + '[!$([#6,H0,-,-2,-3]),$([!H0;#7,#8,#9])]', + '[r]' + ] + smarts_labels = ['hydrophobic', 'aromatic', 'acceptor', 'donor', + 'ring'] + elif not isinstance(smarts_properties, (list, tuple, np.ndarray)): + raise TypeError('smarts_properties must be a list') + else: + self.SMARTS = smarts_properties + + if smarts_labels is not None: + if len(smarts_labels) != len(self.SMARTS): + raise ValueError('Incorrect number of SMARTS labels: %s' + ' instead of %s' + % (len(smarts_labels), len(self.SMARTS))) + else: + smarts_labels = ['smarts%s' % i for i in range(len(self.SMARTS))] + + # Compile patterns + self.compile_smarts() + self.FEATURE_NAMES += smarts_labels + + def compile_smarts(self): + self.__PATTERNS = [] + for smarts in self.SMARTS: + self.__PATTERNS.append(pybel.Smarts(smarts)) + + def encode_num(self, atomic_num): + """Encode atom type with a binary vector. If atom type is not included in + the `atom_classes`, its encoding is an all-zeros vector. + + Parameters + ---------- + atomic_num: int + Atomic number + + Returns + ------- + encoding: np.ndarray + Binary vector encoding atom type (one-hot or null). + """ + + if not isinstance(atomic_num, int): + raise TypeError('Atomic number must be int, %s was given' + % type(atomic_num)) + + encoding = np.zeros(self.NUM_ATOM_CLASSES) + try: + encoding[self.ATOM_CODES[atomic_num]] = 1.0 + except: + pass + return encoding + + def find_smarts(self, molecule): + """Find atoms that match SMARTS patterns. + + Parameters + ---------- + molecule: pybel.Molecule + + Returns + ------- + features: np.ndarray + NxM binary array, where N is the number of atoms in the `molecule` + and M is the number of patterns. `features[i, j]` == 1.0 if i'th + atom has j'th property + """ + + if not isinstance(molecule, pybel.Molecule): + raise TypeError('molecule must be pybel.Molecule object, %s was given' + % type(molecule)) + + features = np.zeros((len(molecule.atoms), len(self.__PATTERNS))) + + for (pattern_id, pattern) in enumerate(self.__PATTERNS): + atoms_with_prop = np.array(list(*zip(*pattern.findall(molecule))), + dtype=int) - 1 + features[atoms_with_prop, pattern_id] = 1.0 + return features + + def get_features(self, molecule, molcode=None): + """Get coordinates and features for all heavy atoms in the molecule. + + Parameters + ---------- + molecule: pybel.Molecule + molcode: float, optional + Molecule type. You can use it to encode whether an atom belongs to + the ligand (1.0) or to the protein (-1.0) etc. + + Returns + ------- + coords: np.ndarray, shape = (N, 3) + Coordinates of all heavy atoms in the `molecule`. + features: np.ndarray, shape = (N, F) + Features of all heavy atoms in the `molecule`: atom type + (one-hot encoding), pybel.Atom attributes, type of a molecule + (e.g protein/ligand distinction), and other properties defined with + SMARTS patterns + """ + + if not isinstance(molecule, pybel.Molecule): + raise TypeError('molecule must be pybel.Molecule object,' + ' %s was given' % type(molecule)) + if molcode is None: + if self.save_molecule_codes is True: + raise ValueError('save_molecule_codes is set to True,' + ' you must specify code for the molecule') + elif not isinstance(molcode, (float, int)): + raise TypeError('motlype must be float, %s was given' + % type(molcode)) + + coords = [] + features = [] + heavy_atoms = [] + + for i, atom in enumerate(molecule): + # ignore hydrogens and dummy atoms (they have atomicnum set to 0) + # if atom.atomicnum > 1: + # heavy_atoms.append(i) + # coords.append(atom.coords) + # + # features.append(np.concatenate(( + # self.encode_num(atom.atomicnum), + # [atom.__getattribute__(prop) for prop in self.NAMED_PROPS], + # [func(atom) for func in self.CALLABLES], + # ))) + + heavy_atoms.append(i) + coords.append(atom.coords) + + # ['hyb', 'heavyvalence', 'heterovalence', + # 'partialcharge'] + atom_attributes = [] + for prop in self.NAMED_PROPS: + if prop == 'hyb': + atom_attributes.append(atom.hyb) + if prop == 'heavyvalence': + atom_attributes.append(atom.heavydegree) + if prop == 'heterovalence': + atom_attributes.append(atom.heterodegree) + if prop == 'partialcharge': + atom_attributes.append(atom.partialcharge) + if prop == 'radius': + temp = atom.atomicnum + # temp2 = ob.GetVdwRad(temp) + # if abs(temp2 - 1.1) < 0.01: + # temp2 = 1.2 + temp2 = cdk_vdwr_dic[temp] + atom_attributes.append(temp2) + pass + + features.append(np.concatenate(( + self.encode_num(atom.atomicnum), + # [atom.__getattribute__(prop) for prop in self.NAMED_PROPS], + atom_attributes, + [func(atom) for func in self.CALLABLES], + ))) + + coords = np.array(coords, dtype=np.float32) + features = np.array(features, dtype=np.float32) + if self.save_molecule_codes: + features = np.hstack((features, + molcode * np.ones((len(features), 1)))) + + features = np.hstack([features, + self.find_smarts(molecule)[heavy_atoms]]) + + if np.isnan(features).any(): + raise RuntimeError('Got NaN when calculating features') + + return coords, features + +def write_feature_file(folder): + contents = folder.split('/') + id = contents[-1] + ligand_file = folder + '/{}_ligand.mol2'.format(id) + pocket_file = folder + '/{}_pocket.pdb'.format(id) + + ligand = next(pybel.readfile('mol2', ligand_file)) + featurizer = Featurizer() + ligand_coords, ligand_feature = featurizer.get_features(ligand, molcode=0) + with open('{}/{}_ligand_feature.txt'.format(folder,id), 'w') as f: + for i in range(ligand_feature.shape[0]): + # f.writelines('{} {}\n'.format(i+1, ligand_feature[i].tolist())) + f.writelines('{}'.format(i+1)) + for j in ligand_coords[i]: + f.writelines(' {}'.format(j)) + for j in ligand_feature[i]: + f.writelines(' {}'.format(j)) + f.writelines('\n') + + pocket = next(pybel.readfile('pdb', pocket_file)) + featurizer = Featurizer() + pocket_coords, pocket_feature = featurizer.get_features(pocket, molcode=1) + with open('{}/{}_pocket_feature.txt'.format(folder, id), 'w') as f: + for i in range(pocket_feature.shape[0]): + # f.writelines('{} {}\n'.format(i+1, pocket_feature[i].tolist())) + f.writelines('{}'.format(i+1)) + for j in pocket_coords[i]: + f.writelines(' {}'.format(j)) + for j in pocket_feature[i]: + f.writelines(' {}'.format(j)) + f.writelines('\n') + pass + +if __name__ == "__main__": + path = os.getcwd() + write_feature_file(path) + diff --git a/pdbbind/python/clean_index_error.py b/pdbbind/python/clean_index_error.py new file mode 100644 index 0000000..09c830f --- /dev/null +++ b/pdbbind/python/clean_index_error.py @@ -0,0 +1,35 @@ +# Some CONECT index in pdb file is 0, which cause cdk does not work. +# This file is used to revise the pdb file, by simply remove the CONECT record include 0 index. + +def clean_index_for_file(folder): + contents = folder.split('/') + id = contents[-1] + # ligand_file = folder + '/{}_ligand.mol2'.format(id) + file = folder + '/{}_pocket.pdb'.format(id) + + rewrite = False + new_file_lines = [] + + with open(file, 'r') as f: + rows = f.readlines() + for row in rows: + if 'CONECT' in row: + if ' 0' in row: + rewrite = True + continue + new_file_lines.append(row) + + if rewrite: + print('Need clean index error: ', id) + with open(file, 'w') as f: + for line in new_file_lines: + f.write(line) + return id + else: + return None + +import os + +if __name__ == "__main__": + path = os.getcwd() + clean_index_for_file(path) \ No newline at end of file diff --git a/pdbbind/python/generate_pdb_list.py b/pdbbind/python/generate_pdb_list.py new file mode 100644 index 0000000..6a35c7b --- /dev/null +++ b/pdbbind/python/generate_pdb_list.py @@ -0,0 +1,228 @@ +import re +import math +import numpy as np +import random +import os +import pandas as pd + + + +def filter(folders, feature_need_sdf = []): + results = [] + for folder in folders: + if len(folder) != 4: + continue + if folder in feature_need_sdf: + continue + if os.path.isfile(folder): + print(folder) + continue + results.append(folder) + return results + +def check_duplicate(folder1, folder2): + results = [] + for folder in folder1: + if folder in folder2: + # print('folder1 subfolder {} also in folder2'.format(folder)) + continue + results.append(folder) + for folder in folder2: + if folder in folder1: + # print('folder2 subfolder {} also in folder1'.format(folder)) + pass + return results + +# use to check if all the compelx show in PL_data include pdb and mol2 file. Answer is NO. +# some data has the affinity data, but do not have pdb and mol2. +def check_dic_id(dic, general_folder, refine_folder, core_folder): + for key in dic: + if key not in general_folder and key not in refine_folder and key not in core_folder: + print(key) + + +def read_affinity(file_name, mode): + """ + read affinity data from pdbbind index file, refers to log Kd/Ki + Benefit is: in general set, there are some <,>,~ in Kd/Ki, directly use log Kd/Ki might be better. + """ + record_dic = {} + with open(file_name, 'r') as data_fid: + for line in data_fid: + if '#' in line or '===' in line or len(line) == 0: + continue + line = re.sub('\s+', ' ', line).strip() + contents = line.split(' ') + id = contents[0] + affinity = float(contents[3]) + if mode == 'reg': + record_dic[id] = affinity + else: + if affinity > 6 + math.log10(5): + record_dic[id] = 1 + elif affinity < 6 - math.log10(5): + record_dic[id] = 0 + else: + print('skip {}, affinity is {}.'.format(id, affinity)) + return record_dic + +# train/test/val split, with all pdbbind data. +def write_octree_list(general_dic, core_folders, refined_folders, depth = 8, mode = 'reg', view_num = 24): + # test only include core set + test_file_name = root_folder + 'octree_list_test_{}_{}.txt'.format(depth, mode) + test_affinity = [] + with open(test_file_name, 'w') as f: + for id in core_folders: + if id not in general_dic: + continue + for v in range(0, view_num): + path = 'coreset/{0}/octree_folder/{1}_points_{2}_2_{3:03d}.octree'.format(id, id, depth, v) + line = '{} {}\n'.format(path, general_dic[id]) + f.write(line) + test_affinity.append(general_dic[id]) + + # validation is part of refined set + random.seed(2020) + val_id = random.sample(refined_folders, k=600) + val_file_name = root_folder + 'octree_list_val_{}_{}.txt'.format(depth, mode) + val_affinity = [] + with open(val_file_name, 'w') as f: + for id in refined_folders: + if id not in val_id: + continue + if id not in general_dic: + print('id {} not in general_dic'.format(id)) + continue + for v in range(0, view_num): + path = 'refined-set/{0}/octree_folder/{1}_points_{2}_2_{3:03d}.octree'.format(id, id, depth, v) + line = '{} {}\n'.format(path, general_dic[id]) + f.write(line) + val_affinity.append(general_dic[id]) + + train_file_name = root_folder + 'octree_list_train_{}_{}.txt'.format(depth, mode) + train_affinity = [] + with open(train_file_name, 'w') as f: + train_total = general_folders + refined_folders + for id in train_total: + if id in val_id: + continue + if id not in general_dic: + continue + for v in range(0, view_num): + if id in refined_folders: + path = 'refined-set/{0}/octree_folder/{1}_points_{2}_2_{3:03d}.octree'.format(id, id, depth, v) + else: + path = 'v2018-other-PL/{0}/octree_folder/{1}_points_{2}_2_{3:03d}.octree'.format(id, id, depth, v) + line = '{} {}\n'.format(path, general_dic[id]) + f.write(line) + train_affinity.append(general_dic[id]) + + train_affinity = np.array(train_affinity) + val_affinity = np.array(val_affinity) + test_affinity = np.array(test_affinity) + print('train_affinity size: {}, mean: {}, std: {}'.format(train_affinity.shape[0], np.mean(train_affinity), np.std(train_affinity))) + print('val_affinity size: {}, mean: {}, std: {}'.format(val_affinity.shape[0], np.mean(val_affinity), np.std(val_affinity))) + print('test_affinity size: {}, mean: {}, std: {}'.format(test_affinity.shape[0], np.mean(test_affinity), np.std(test_affinity))) + +def write_points_list(general_dic, core_folders, refined_folders, mode = 'reg', density = None): + # test only include core set + test_file_name = root_folder + 'points_list_test_{}.txt'.format(mode) + test_affinity = [] + with open(test_file_name, 'w') as f: + for id in core_folders: + if id not in general_dic: + continue + if density is None: + path = 'coreset/{0}/{1}_points.points'.format(id, id) + else: + path = 'coreset/{0}/{1}_points_{2}.points'.format(id, id, density) + # print(path) + line = '{} {}\n'.format(path, general_dic[id]) + f.write(line) + test_affinity.append(general_dic[id]) + + # validation is part of refined set + random.seed(2020) + # val_id = random.choices(refined_folders, k=1000) + val_id = random.sample(refined_folders, k=600) + val_file_name = root_folder + 'points_list_val_{}.txt'.format(mode) + val_affinity = [] + with open(val_file_name, 'w') as f: + for id in refined_folders: + if id not in val_id or id not in general_dic: + continue + if density is None: + path = 'refined-set/{0}/{1}_points.points'.format(id, id) + else: + path = 'refined-set/{0}/{1}_points_{2}.points'.format(id, id, density) + + line = '{} {}\n'.format(path, general_dic[id]) + f.write(line) + val_affinity.append(general_dic[id]) + + + + train_file_name = root_folder + 'points_list_train_{}.txt'.format(mode) + train_affinity = [] + with open(train_file_name, 'w') as f: + train_total = general_folders + refined_folders + for id in train_total: + if id in val_id: + continue + if id not in general_dic: + continue + if id in refined_folders: + if density is None: + path = 'refined-set/{0}/{1}_points.points'.format(id, id) + else: + path = 'refined-set/{0}/{1}_points_{2}.points'.format(id, id, density) + else: + if density is None: + path = 'v2018-other-PL/{0}/{1}_points.points'.format(id, id) + else: + path = 'v2018-other-PL/{0}/{1}_points_{2}.points'.format(id, id, density) + + # print(path) + line = '{} {}\n'.format(path, general_dic[id]) + f.write(line) + train_affinity.append(general_dic[id]) + + train_affinity = np.array(train_affinity) + val_affinity = np.array(val_affinity) + test_affinity = np.array(test_affinity) + print('train_affinity size: {}, mean: {}, std: {}'.format(train_affinity.shape[0], np.mean(train_affinity), np.std(train_affinity))) + print('val_affinity size: {}, mean: {}, std: {}'.format(val_affinity.shape[0], np.mean(val_affinity), np.std(val_affinity))) + print('test_affinity size: {}, mean: {}, std: {}'.format(test_affinity.shape[0], np.mean(test_affinity), np.std(test_affinity))) + +if __name__ == "__main__": + root_folder = '../data_folder/' + + mode = 'reg' + + general_file = root_folder + 'v2018-other-PL/index/INDEX_general_PL_data.2018' + + general_folders = os.listdir(root_folder + 'v2018-other-PL') + refined_folders = os.listdir(root_folder + 'refined-set') + core_folders = os.listdir(root_folder + 'coreset') + + general_folders = filter(general_folders) + refined_folders = filter(refined_folders) + core_folders = filter(core_folders) + + print(len(general_folders), len(refined_folders), len(core_folders)) + print('Check general and core') + general_folders = check_duplicate(general_folders, core_folders) + print('Check general and refine') + general_folders = check_duplicate(general_folders, refined_folders) + print('Check refine and core') + refined_folders = check_duplicate(refined_folders, core_folders) + print(len(general_folders), len(refined_folders), len(core_folders)) + + core_id_list = core_folders + general_dic = read_affinity(general_file, mode = mode) + print(len(general_dic)) + + # check_dic_id(general_dic, general_folders, refined_folders, core_folders) + + # write_octree_list(general_dic, core_folders, refined_folders, depth = 5, mode = mode, view_num = 24) + write_points_list(general_dic, core_folders, refined_folders, mode=mode, density = 3) diff --git a/pdbbind/python/write_complex.py b/pdbbind/python/write_complex.py new file mode 100644 index 0000000..0d5b018 --- /dev/null +++ b/pdbbind/python/write_complex.py @@ -0,0 +1,15 @@ +import os + +path = os.getcwd() +complex_id = path.split('/')[-1] + + +# file_list is used to get .points from pdb points. +id_list_file = path + '/file_list.txt' +with open(id_list_file, 'w') as fw: + fw.write(complex_id) + +# point_list is used to get .octrees from .points +id_list_file = path + '/point_list.txt' +with open(id_list_file, 'w') as fw: + fw.write(path + '/' + complex_id + '_points.points') \ No newline at end of file diff --git a/tensorflow/.DS_Store b/tensorflow/.DS_Store index daa977a..2846d94 100644 Binary files a/tensorflow/.DS_Store and b/tensorflow/.DS_Store differ diff --git a/tensorflow/script/.DS_Store b/tensorflow/script/.DS_Store index 53c5fb4..c7139c2 100644 Binary files a/tensorflow/script/.DS_Store and b/tensorflow/script/.DS_Store differ diff --git a/tensorflow/script/configs/.DS_Store b/tensorflow/script/configs/.DS_Store index 5008ddf..a332610 100644 Binary files a/tensorflow/script/configs/.DS_Store and b/tensorflow/script/configs/.DS_Store differ diff --git a/tensorflow/script/configs/train_resnet_depth6.yaml b/tensorflow/script/configs/train_resnet_depth6.yaml new file mode 100644 index 0000000..882efbb --- /dev/null +++ b/tensorflow/script/configs/train_resnet_depth6.yaml @@ -0,0 +1,54 @@ +SOLVER: + gpu: 0, + logdir: ./logs/pdbbind/resnet_points_reg_6 + run: train + max_iter: 188000 #190437 #100 epochs for batch_size=8 + test_iter: 100 + test_every_iter: 4000 + step_size: (40000,) + learning_rate: 0.001 + task: reg + +DATA: + train: + dtype: points + distort: True + depth: 6 + full_depth: 2 + axis: xyz + angle: (180, 180, 180) + interval: (1, 1, 1) + scale: 0.0 + jitter: 0.125 + location: ../../pdbbind/data_folder/tfrecords/train_reg_points_den3.tfrecords + batch_size: 8 + x_alias: data + shuffle: 100 + dropout: (0, 0) + test: + dtype: points + distort: True + depth: 6 + full_depth: 2 + axis: xyz + angle: (12, 12, 12) + interval: (1, 1, 1) + scale: 0.0 + jitter: 0.0 + location: ../../pdbbind/data_folder/tfrecords/val_reg_points_den3.tfrecords + shuffle: 1 + batch_size: 6 + x_alias: data + +MODEL: + name: resnet #ocnn or resnet + channel: 24 + nout: 1 + depth: 6 + depth_out: 2 + dropout: (0.0,) + resblock_num: 3 + +LOSS: + num_class: 1 + weight_decay: 0.01 \ No newline at end of file