把xlsx表1中每个基因都和xlsx表2的里面的每个基因进行一对一的跑一下
原table
读取WK.xlsx里的第一个表,将第一列添加>作为fasta的序列名,第二列作为fasta文件的序列
import pandas as pd
def excel_to_fasta(excel_file, fasta_file):
try:
# 读取 Excel 文件的第一个表
df = pd.read_excel(excel_file, sheet_name=0)
# 打开 FASTA 文件以写入
with open(fasta_file, 'w') as f:
for index, row in df.iterrows():
# 添加序列名
f.write(f">{row[0]}\n")
# 添加序列
f.write(f"{row[1]}\n")
print(f"成功将 {excel_file} 转换为 {fasta_file}")
except FileNotFoundError:
print(f"错误: 文件 {excel_file} 未找到!")
except Exception as e:
print(f"错误: 发生了一个未知错误: {e}")
if __name__ == "__main__":
excel_file = 'WRKY_VQ.xlsx'
fasta_file = 'output.fasta'
excel_to_fasta(excel_file, fasta_file)
omega
omegafold VQ.fasta VQ
omegafold WRKY.fasta WRKY
多个一起运行
cat wrkyid |while read id1
do
cat vqid|while read id2
do
echo "hdock WRKY/$id1.pdb VQ/$id2.pdb -out ${id1}vs${id2}.out" >>work1.sh
done
done
nohup parallel -j 100 <work1.sh &
结构提取:
import os
import csv
def extract_value(file_path):
try:
with open(file_path, 'r') as file:
lines = file.readlines()
if len(lines) >= 6:
sixth_line = lines[5].split()
if len(sixth_line) >= 7:
return sixth_line[6]
except Exception as e:
print(f"处理文件 {file_path} 时出错: {e}")
return None
def main():
csv_data = []
current_dir = os.getcwd()
for filename in os.listdir(current_dir):
if filename.endswith('.out'):
file_path = os.path.join(current_dir, filename)
value = extract_value(file_path)
if value is not None:
csv_data.append([filename, value])
with open('output.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['文件名', '提取的数值'])
writer.writerows(csv_data)
if __name__ == "__main__":
main()
```










网友评论