python15

作者: rong酱 | 来源:发表于2021-08-17 12:46 被阅读0次
# -*- coding: utf-8 -*-
#!/usr/bin/env python

import os
import sys
import argparse

parser = argparse.ArgumentParser(description="trim data , change geneid to genename")
parser.add_argument('--DEseq2_file',help="DEseq2.xls.filter, include sample different geneID and genename ",required=True)
parser.add_argument('--diffmatrix',help="help",required=True)
parser.add_argument('--top20con',help="help",required=True)
parser.add_argument('--trimfile',help="help",required=True)
argv = vars(parser.parse_args())
DEseq2file = argv['DEseq2_file'] # 存储path路径的mapfile
difffile=argv['diffmatrix']
top20file=argv['top20con'] # 存储top 20的 geneid and genename 
outfile=argv['trimfile']

# 根据padj 提取前面差异最大的20个基因,获得geneid 和 gene name;
os.system('cat %s | sort -k 7 -u -r | head -n 21 | cut -f 1,2 >%s'%(DEseq2file,top20file))

# top20的基因,基因ID、基因name 存入字典
top20 = {}
with open(top20file,"r") as topliness:
    toplines = topliness.readlines()
    for topline in toplines[1:]:
        toplin = topline.strip().split("\t")
        print(toplin)
        topgeneID = toplin[0]
        if 'ENS' in str(topgeneID):
            topgeneID = toplin[0]+"_"+toplin[1]
        top20[topgeneID] = toplin[1]

# 提取top 20 gene的geneID, 提取相应的基因,并更换基因名
outcontent = open(outfile,"w")
with open(difffile,"r") as diffliness:
    difflines = diffliness.readlines()
    headlines = difflines[0].strip().split('\t')
    headcon = 'geneid'+'\t'
    for headeri in headlines[1:]:
        headcon = headcon+str(headeri)+ "\t"
    outcontent.write(str(headcon)+"\n")
    for diffline in difflines[1:]:
        difflin = diffline.strip().split('\t')
        gendifid = difflin[0]
        if gendifid in top20.keys():
            con = ''
            con = top20[gendifid] +"\t"
            for diffli in difflin[1:]:
                con = con + str(diffli)+"\t"
            outcontent.write(str(con)+"\n")

outcontent.close()

相关文章

网友评论

      本文标题:python15

      本文链接:https://www.haomeiwen.com/subject/syabbltx.html