美文网首页
数据分析:iCluster analysis

数据分析:iCluster analysis

作者: 生信学习者2 | 来源:发表于2021-01-25 14:06 被阅读0次

介绍

iCluster分析是基于多组学数据的整合分类算法。更多知识分享请到 https://zouhua.top/

Rscript

#!/usr/bin/R
library(argparser)
library(dplyr)
library(tibble)
library(ggplot2)
library(data.table)
library(iCluster)

rm(list = ls())
options(stringsAsFactors = F)
options(future.globals.maxSize = 1000 * 1024^2)

grp <- c("S1", "S2")
grp.col <- c("#6C326C", "#77A2D1")

# parameter input
parser <- arg_parser("iCluster function") %>%
    add_argument("-p", "--phen", 
        help = "phenotype") %>%
    add_argument("-c", "--copyNumber", 
        help = "copyNumber matrix") %>%
    add_argument("-g", "--geneExp", 
        help = "gene Expression matrix") %>%
    add_argument("-m", "--methylation", 
        help = "DNA methylation matrix")  %>%
    add_argument("-r", "--RPPA", 
        help = "protein_RPPA")  %>%
    add_argument("-o", "--out", 
        help = "result with director", default = "./")
    
args <- parse_args(parser)

# prepare for function 
phen <- fread(args$p)                                  
copyNumber <- fread(args$c)
geneExp <- fread(args$g)
methylation <- fread(args$m)
protein_RPPA <- fread(args$r)
out <- args$o   

get_profile <- function(dataset = copyNumber,
                        metadata = phen, 
                        tag = "copyNumber"){
  
  # dataset = protein_RPPA
  # metadata = phen
  # tag = "protein_RPPA"
  
  sid <- intersect(phen$Barcode, colnames(dataset)) 
  res <- dataset %>% dplyr::select(c("V1", sid)) %>% 
    mutate(Type=tag) %>%
    mutate(Name=paste(V1, Type, sep = "_")) %>%
    dplyr::select(Name, V1, Type, everything()) %>%
    dplyr::select(-c("V1", "Type")) %>%
    column_to_rownames("Name") %>%
    t()
  return(res)
}

datasets <- list(copyNumber=get_profile(dataset = copyNumber, tag = "copyNumber"),
                 geneExp=get_profile(dataset = geneExp, tag = "geneExp"),
                 methylation=get_profile(dataset = methylation, tag = "methylation"),
                 protein_RPPA=get_profile(dataset = protein_RPPA, tag = "protein_RPPA"))

print(names(datasets))


# icluster 
fit <- iCluster(datasets = datasets, k=2, lambda=rep(0.2, 4), max.iter = 50, epsilon = 1e-3)
#plotiCluster(fit=fit, label=rownames(datasets[[2]]))
#compute.pod(fit)

phen_new_icluster <- inner_join(phen,
              data.frame(SampleID=rownames(datasets[[2]]), Cluster=fit$clusters), 
             by = c("Barcode"="SampleID")) %>% 
  dplyr::select(Barcode, Cluster, everything()) %>%
  mutate(Cluster=paste0("S", Cluster))

name1 <- paste0(out, "phenotype_cluster_iCluster.csv")
write.csv(phen_new_icluster, file = name1, row.names = F)


fit2 <- iCluster2(datasets = datasets, k=2, lambda=list(0.2, 0.2, 0.2, 0.2),
                 max.iter = 50, verbose = TRUE)

if(0){
    phen_new_icluster2 <- inner_join(phen,
                data.frame(SampleID=rownames(datasets[[2]]), Cluster=fit2$clusters), 
                by = c("Barcode"="SampleID")) %>% 
    dplyr::select(Barcode, Cluster, everything()) %>%
    mutate(Cluster=paste0("S", Cluster))

    name2 <- paste(out, "phenotype_cluster_iCluster2.csv")
    write.csv(phen_new_icluster2, file = name2, row.names = F)
}

save(fit, fit2, file = "iCluster_fit.RData")

Run

Rscript iCluster.R -p ../../Result/phenotype/common_survival_data.tsv \
 -c ../../Result/profile/copyNumber_filter.tsv \
 -g ../../Result/profile/geneExp_filter.tsv \
 -m ../../Result/profile/methylation_filter.tsv \
 -r ../../Result/profile/protein_RPPA_filter.tsv \
 -o ./

Reference

  1. iCluster package

参考文章如引起任何侵权问题,可以与我联系,谢谢。

相关文章

网友评论

      本文标题:数据分析:iCluster analysis

      本文链接:https://www.haomeiwen.com/subject/wpobzktx.html