Global Ligand Docking using XMLObjects Using the ref2015.wts Scorefunction
这个Jupyter笔记本需要PyRosetta分布式层。请确保在运行此笔记本之前激活PyRosetta.notebooks conda环境。内核设置为使用此环境。
import logging
logging.basicConfig(level=logging.INFO)
import matplotlib
%matplotlib inline
import os
import pandas as pd
import pyrosetta
import pyrosetta.distributed.viewer as viewer
import seaborn
seaborn.set()
import sys
现在我们将score函数更改为ref2015.wts
ligand_params = "inputs/TPA.am1-bcc.fa.params"
flags = f"""
-ignore_unrecognized_res 1
-extra_res_fa {ligand_params}
"""
pyrosetta.distributed.init(flags)
pose = pyrosetta.io.pose_from_file(filename="inputs/test_lig.pdb")
scorefxn = pyrosetta.create_score_function("ref2015")
使用XmlObjects进行配体对接:
xml = pyrosetta.rosetta.protocols.rosetta_scripts.XmlObjects.create_from_string("""
<ROSETTASCRIPTS>
<SCOREFXNS>
<ScoreFunction name="fa_standard" weights="ref2015.wts"/>
</SCOREFXNS>
<RESIDUE_SELECTORS>
<Chain name="chX" chains="X"/>
</RESIDUE_SELECTORS>
<SIMPLE_METRICS>
<RMSDMetric name="rmsd_chX" residue_selector="chX" reference_name="store_native" residue_selector_ref="chX" robust="true" rmsd_type="rmsd_all" />
</SIMPLE_METRICS>
<SCORINGGRIDS ligand_chain="X" width="25">
<ClassicGrid grid_name="vdw" weight="1.0"/>
</SCORINGGRIDS>
<LIGAND_AREAS>
<LigandArea name="docking_sidechain_X" chain="X" cutoff="6.0" add_nbr_radius="true" all_atom_mode="true" minimize_ligand="10"/>
<LigandArea name="final_sidechain_X" chain="X" cutoff="6.0" add_nbr_radius="true" all_atom_mode="true"/>
<LigandArea name="final_backbone_X" chain="X" cutoff="7.0" add_nbr_radius="false" all_atom_mode="true" Calpha_restraints="0.3"/>
</LIGAND_AREAS>
<INTERFACE_BUILDERS>
<InterfaceBuilder name="side_chain_for_docking" ligand_areas="docking_sidechain_X"/>
<InterfaceBuilder name="side_chain_for_final" ligand_areas="final_sidechain_X"/>
<InterfaceBuilder name="backbone" ligand_areas="final_backbone_X" extension_window="3"/>
</INTERFACE_BUILDERS>
<MOVEMAP_BUILDERS>
<MoveMapBuilder name="docking" sc_interface="side_chain_for_docking" minimize_water="true"/>
<MoveMapBuilder name="final" sc_interface="side_chain_for_final" bb_interface="backbone" minimize_water="true"/>
</MOVEMAP_BUILDERS>
<MOVERS>
<SavePoseMover name="spm" restore_pose="0" reference_name="store_native"/>
<Transform name="transform" chain="X" box_size="20.0" move_distance="10" angle="360" initial_perturb="2" cycles="500" repeats="5" temperature="1000"/>
<HighResDocker name="high_res_docker" cycles="9" repack_every_Nth="3" scorefxn="fa_standard" movemap_builder="docking"/>
<FinalMinimizer name="final" scorefxn="fa_standard" movemap_builder="final"/>
</MOVERS>
<FILTERS>
<LigInterfaceEnergy name="interfE" scorefxn="fa_standard" energy_cutoff="0.0" confidence="0"/>
<SimpleMetricFilter name="rmsd_chX" metric="rmsd_chX" cutoff="999999." comparison_type="lt" confidence="0"/>
</FILTERS>
<PROTOCOLS>
<Add mover="spm"/>
<Add mover="transform"/>
<Add mover="high_res_docker"/>
<Add mover="final"/>
<Add filter="interfE"/>
<Add filter="rmsd_chX"/>
</PROTOCOLS>
</ROSETTASCRIPTS>
""").get_mover("ParsedProtocol")
使用PyJobDistributor生成5个全局配体对接轨迹:
if not os.getenv("DEBUG"):
working_dir = os.getcwd()
output_dir = "outputs"
if not os.path.exists(output_dir):
os.mkdir(output_dir)
os.chdir(output_dir)
jd = pyrosetta.toolbox.py_jobdistributor.PyJobDistributor(pdb_name="test_lig_XMLObjects",
nstruct=5,
scorefxn=scorefxn)
jd.native_pose = pose
df = pd.DataFrame()
while not jd.job_complete:
test_pose = pose.clone()
xml.apply(test_pose)
test_df = pd.DataFrame.from_records(dict(test_pose.scores), index=[jd.current_name])
df = df.append(test_df)
jd.output_decoy(test_pose)
os.chdir(working_dir)
现在我们已经对一些全球配体对接轨迹进行了采样,让我们绘制配体结合能图:
if not os.getenv("DEBUG"):
matplotlib.rcParams['figure.figsize'] = [12.0, 8.0]
seaborn.scatterplot(x="rmsd_chX", y="interfE", data=df)
让我们看看生成的interfE值最低的姿势:
df.sort_values(by="interfE")
#Skip for tests
if not os.getenv("DEBUG"):
lowest_energy_pdb_filename = os.path.join("expected_outputs", df.sort_values(by="interfE").head(1).index[0])
test_pose = pyrosetta.io.pose_from_file(filename=lowest_energy_pdb_filename)
chE = pyrosetta.rosetta.core.select.residue_selector.ChainSelector("E")
view = viewer.init(test_pose)
view.add(viewer.setStyle())
view.add(viewer.setStyle(command=({"hetflag": True}, {"stick": {"colorscheme": "brownCarbon", "radius": 0.2}})))
view.add(viewer.setSurface(residue_selector=chE, opacity=0.7, color='white'))
view.add(viewer.setHydrogenBonds())
view()
GALigandDock Protocol with pyrosetta.distributed Using the beta_cart.wts Scorefunction
import logging
logging.basicConfig(level=logging.INFO)
import matplotlib
%matplotlib inline
import os
import pandas as pd
import pyrosetta
import pyrosetta.distributed
import pyrosetta.distributed.io as io
import pyrosetta.distributed.viewer as viewer
import pyrosetta.distributed.packed_pose as packed_pose
import pyrosetta.distributed.tasks.rosetta_scripts as rosetta_scripts
import seaborn
seaborn.set()
import sys
使用-beta_cart标志时加载TPA.am1-bcc.gp.params文件,该标志具有生成电位原子类型和am1-bcc部分电荷:
pdb_filename = "inputs/test_lig.pdb"
ligand_params = "inputs/TPA.am1-bcc.gp.params"
flags = f"""
-ignore_unrecognized_res 1
-extra_res_fa {ligand_params}
-beta_cart
-out:level 200
"""
pyrosetta.distributed.init(flags)
pose_obj = io.pose_from_file(filename=pdb_filename)
现在,我们将RosettaScripts脚本中的记分函数更改为beta_cart.wts,使用Ambe带有AM1-BCC部分电荷的配体在蛋白质-配体复合物上优化了其权重。
如果通过命令行运行,RosettaScripts中的GALigandDock通常会将多个.pdb文件输出到磁盘。然而,当在pyrossetta.distributed中使用MultioutputRosettaScriptsTask函数时,输出将在这个Jupyter会话中捕获到内存中!
xml = f"""
<ROSETTASCRIPTS>
<SCOREFXNS>
<ScoreFunction name="fa_standard" weights="beta_cart.wts"/>
</SCOREFXNS>
<MOVERS>
<GALigandDock name="dock"
scorefxn="fa_standard"
scorefxn_relax="fa_standard"
grid_step="0.25"
padding="5.0"
hashsize="8.0"
subhash="3"
nativepdb="{pdb_filename}"
final_exact_minimize="sc"
random_oversample="10"
rotprob="0.9"
rotEcut="100"
sidechains="auto"
initial_pool="{pdb_filename}">
<Stage repeats="10" npool="50" pmut="0.2" smoothing="0.375" rmsdthreshold="2.5" maxiter="50" pack_cycles="100" ramp_schedule="0.1,1.0"/>
<Stage repeats="10" npool="50" pmut="0.2" smoothing="0.375" rmsdthreshold="1.5" maxiter="50" pack_cycles="100" ramp_schedule="0.1,1.0"/>
</GALigandDock>
</MOVERS>
<PROTOCOLS>
<Add mover="dock"/>
</PROTOCOLS>
</ROSETTASCRIPTS>
"""
xml_obj = rosetta_scripts.MultioutputRosettaScriptsTask(xml)
xml_obj.setup()
MultioutputRosettaScriptsTask是一个python生成器对象。因此,我们需要对其调用list()或set()来运行它。以下单元将运行约45分钟的CPU时间。
if not os.getenv("DEBUG"):
%time results = list(xml_obj(pose_obj))
Inspect the scores for the GALigandDock trajectories:
if not os.getenv("DEBUG"):
df = pd.DataFrame.from_records(packed_pose.to_dict(results))
print(df)
Now that we have performed GALigandDock, we can plot the ligand binding energy landscape:
if not os.getenv("DEBUG"):
matplotlib.rcParams["figure.figsize"] = [12.0, 8.0]
seaborn.scatterplot(x="lig_rms", y="total_score", data=df)
if not os.getenv("DEBUG"):
ppose_lowest_total_score = results[df.sort_values(by="total_score").index[0]]
view = viewer.init(ppose_lowest_total_score)
view.add(viewer.setStyle())
view.add(viewer.setStyle(command=({"hetflag": True}, {"stick": {"colorscheme": "brownCarbon", "radius": 0.2}})))
view.add(viewer.setSurface(residue_selector=pyrosetta.rosetta.core.select.residue_selector.ChainSelector("E"), opacity=0.7, color='white'))
view.add(viewer.setHydrogenBonds())
view()














网友评论