美文网首页
2022-03-10

2022-03-10

作者: 球果假水晶蓝 | 来源:发表于2022-03-10 19:21 被阅读0次
import time
from Bio import SeqIO
# 10秒往上
a = time.time()
with open(r'/share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta') as f:
    d_fasta = {}
    for i in f.readline():
        if i.startswith('>'):
            a = i.strip()
            d_fasta[a] = ''
        else:
            d_fasta[a] += i.strip()

print(d_fasta['>Chr01A'][500000])
b = time.time()
print(f'简单重复写法取基因组中第500000个碱基花费时间{b - a}')

##第二种方法  1.2899494171142578
a = time.time()
chr_dict = {}
for seq_record in SeqIO.parse(r'/share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta', "fasta"):
    chr_dict[str(seq_record.id)] = seq_record.seq
print(chr_dict['>Chr01A'][500000])
b = time.time()
print(b - a)

##第三种方法  0.010302305221557617
from pyfaidx import Fasta
a = time.time()
genes = Fasta(r'/share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta')
base1 = genes['Chr01A'][500000:5000001].seq
print(base1)
b = time.time()
print(b - a)

# 第四种方法
import os
# 0.7264723777770996
a = time.time()
os.system('samtools faidx /share/home/stu_zhangyixing/workspace/python0307/split/Chr01A.fasta')
os.system('samtools faidx Chr01A.fasta Chr01A:500000-500000')
b = time.time()
print(b - a)


相关文章

网友评论

      本文标题:2022-03-10

      本文链接:https://www.haomeiwen.com/subject/bphqdrtx.html