生信技能树推文:
表观调控13张图之四,peaks区域注释分类比例
参考上面推文,获得不同样本peak 注释结果,
主要是绘图过程,及其在图里面进行部分标注,居中显示
Promoter 5'UTR Exon Intron 3'UTR Intergenic
Cg_WT 12250 32 99 3291 148 1999
Ez_WT 7057 15 63 1027 79 1113
Pc_WT 3871 12 69 763 37 1110
Ph_WT 10558 34 92 2819 67 1809
Pho_WT 6776 9 55 1436 40 1343
Psc_WT 10085 19 79 2255 46 1669
Spps_WT 3691 5 18 507 11 410
1.加载R,df0 peak 数目信息
library(tidyverse)
library(reshape2)
library(ggpubr)
# 1.df0 peak 数目信息
tmp <- read.table(file="clipboard",header = T,check.names = F)
df0 <-tbl_df(tmp) %>%
mutate(name=row.names(tmp)) %>% gather(key = "key",value="value",-name)
colnames(df0) <-c("key","name","count")
> df0
# A tibble: 42 x 3
key name count
<chr> <chr> <int>
1 Cg_WT Promoter 12250
2 Ez_WT Promoter 7057
3 Pc_WT Promoter 3871
4 Ph_WT Promoter 10558
5 Pho_WT Promoter 6776
6 Psc_WT Promoter 10085
7 Spps_WT Promoter 3691
8 Cg_WT 5'UTR 32
9 Ez_WT 5'UTR 15
10 Pc_WT 5'UTR 12
# ... with 32 more rows
2.df2 注释所占比例
## 2.df2 注释所占比例
mid <-function(T1){
return((T1+c(0,T1[1:length(T1)-1]))/2)
}
mid(c(1,3,9))
df1 <-apply(tmp, 1, function(x) {
x/sum(x)
})
df2 <- tbl_df(df1)%>%mutate(name=rownames(df1))%>%
tidyr::gather(key="key",value = "value",-name)
df2 <-df2 %>%group_by(key) %>% mutate(cumsum=rev(cumsum(rev(value))),
mid=rev(mid(rev(cumsum))))
> df2
# A tibble: 42 x 5
# Groups: key [7]
name key value cumsum mid
<fct> <chr> <dbl> <dbl> <dbl>
1 Promoter Cg_WT 0.687 1 0.656
2 5'UTR Cg_WT 0.00180 0.313 0.312
3 Exon Cg_WT 0.00556 0.311 0.308
4 Intron Cg_WT 0.185 0.305 0.213
5 3'UTR Cg_WT 0.00831 0.120 0.116
6 Intergenic Cg_WT 0.112 0.112 0.0561
7 Promoter Ez_WT 0.754 1 0.623
8 5'UTR Ez_WT 0.00160 0.246 0.245
9 Exon Ez_WT 0.00674 0.244 0.241
10 Intron Ez_WT 0.110 0.237 0.182
3.将两部分数据合并
# 3.将两部分合并
df<-merge(df0,df2)
df$name=factor(df$name,levels =c("Promoter","5'UTR","Exon", "Intron","3'UTR","Intergenic"))
> head(df)
key name count value cumsum mid
1 Cg_WT 3'UTR 148 0.008305741 0.1204894 0.11633649
2 Cg_WT 5'UTR 32 0.001795836 0.3125316 0.31163365
3 Cg_WT Exon 99 0.005555867 0.3107357 0.30795780
4 Cg_WT Intergenic 1999 0.112183624 0.1121836 0.05609181
5 Cg_WT Intron 3291 0.184690499 0.3051799 0.21283461
6 Cg_WT Promoter 12250 0.687468433 1.0000000 0.65626578
4.画图效果
# 4.gg画图
ggplot(df)+
geom_bar(aes(key,value,fill=name),position = "stack",stat="identity")+
geom_text(data=tbl_df(df)%>%filter(value >0.08),aes(x=key,y=mid,label=count))

Tips :
感觉如果没有进行数字标注,很快可以画出来;添加标注,需要考虑标注的位置,顺序是否反了,如果让数字居中显示,上面用到了mid
函数。
网友评论