美文网首页
emmax跑GWAS

emmax跑GWAS

作者: 花生学生信 | 来源:发表于2023-07-26 16:04 被阅读0次
##提取样本名
cut -f 1 GJnew.SE.SNP..txt > Geng
sed -i '1d' Geng  #  删除文件第一列的表头

###提取vcf子集 
bcftools view -S Geng  ../pass0.05.recode.vcf > Geng.vcf

###vcf转ped/map
plink --vcf Geng.vcf --allow-extra-chr --recode --out sevcf


###转bed
plink --file sevcf --make-bed --out sevcf --allow-extra-chr

###bed转tped/tfam
plink --bfile sevcf --recode12 --output-missing-genotype 0 --transpose --out sevcf --allow-extra-chr

###创建基于标记的亲缘关系矩阵,使用emmax-kin创建亲缘关系矩阵(首选BN),确保.tped、tfam文件存在相同前缀
~/tools/emmax-kin-intel64 sevcf -v -d 10 -o emmax_in_BN.kinf

###admixture
##for K in 1 2 3 4 5 6 7 8 9 10;do admixture --cv sevcf.bed $K | tee log${K}.out;done

##~/tools/emmax-intel64 -t sevcf -o 1.k -p tra -k emmax_in_BN.kinf 

##~/tools/emmax-intel64 -t sevcf -o tl.qk -p tra -k emmax_in_BN.kinf -c emmax.cov.txt 

表型提取

tfam <- read.table("sevcf.tfam", header = F, stringsAsFactors = F)
tr <- read.table("GJnew.SE.SNP..txt", header = T, check.names = F, stringsAsFactors = F)
head(tr)
#tr <- tr[match(tfam$V1, tr$'<Trait>'),]
tr <- tr[match(tfam$V1, tr$CX),]

#tr[tr == -999] <- NA
tre <- cbind(tr[,1], tr)
for(i in 3:ncol(tre)){
  file_name <- paste0("trait/emmax.GJ.", names(tre)[i], ".txt")
 write.table(tre[, c(1, 2, i)], file = file_name, col.names = F, row.names = F, sep = "\t", quote = F)

}
###将表型读取 ls *.txt >../id
##运行
cat id|while read id
do

~/tools/emmax-intel64 -t sevcf -o Gjout/$id -p trait/$id -k emmax_in_BN.kinf 

done

整理成map格式

#####sevcf.map文件和emmax.ps文件整合
###map文件第一列为chr,第二列为ID,第四列为位置
###ps文件第四列为p
###ID   chr position    p

#!/usr/bin/perl -w
use strict;

=pod
    Description: combine table
    Author: Ft
    Created:2023-7-26 
    Version: v1.1
=cut

#use Getopt::Long;
#use Bio::SeqIO;

my $svmap=$ARGV[0];
my $ps=$ARGV[1];

my ($ID,$chr,$position,$p,$out);

my %ps=();
open ps,"<$ps" or die $!;
while(<ps>){
    chomp;
    my @map=split/\t/;  
    $ps{$map[0]}=$map[3];

}
close ps;

open OUTFILE ,">$ARGV[2]"; 
print OUTFILE "ID\tchr\tposition\tp\n";

open svmap, "<$svmap" or die $!;
while(<svmap>){
    chomp;
    my @map=split/\t/;
    $chr=$map[0],$ID=$map[1],$position=$map[3];
    if(exists $ps{$ID}){
    print OUTFILE "$ID\t$chr\t$position\t$ps{$ID}\n";
}       
}
close svmap;
close OUT;

画图

###
args<-commandArgs(TRUE)
library("CMplot")
pmap <- read.table(args[1], header = T)
#head(pmap)
# 阈值计算
threshold <- 1/nrow(pmap[!is.na(pmap$position),])
# 画图
#CMplot(pmap, threshold = threshold, amplify = F, memo = "", file = "tiff", plot.type=c("m","q"))
CMplot(pmap, threshold = threshold, amplify = F,file = "jpg",file.name=args[2],file.output=T,plot.type=c("m","q"))
结果图
计算有效snp
java -jar ~/tools/gec/gec.jar --effect-number --plink-binary sevcf --genome --out gec
#结果在.sum文件中。有效标记 和建议阈值。建议阈值用 -log10()算LOD值。

相关文章

网友评论

      本文标题:emmax跑GWAS

      本文链接:https://www.haomeiwen.com/subject/mwukpdtx.html