美文网首页
单细胞Scanpy常用脚本2023-08-28

单细胞Scanpy常用脚本2023-08-28

作者: 黄甫一 | 来源:发表于2023-08-27 10:32 被阅读0次

前言

Scanpy是基于Python语言的类似Seurat的单细胞转录组分析工具,本文汇集了Scanpy的常规操作。

  • 加载模块和设置

import time
print('Start time: ' + time.strftime('%Y-%m-%d %H:%M:%S',time.localtime()))

import numpy as np
import pandas as pd
import scanpy as sc
import louvain

import matplotlib
matplotlib.use('AGG')
import matplotlib.pyplot as plt
from matplotlib import rcParams

sc.settings.verbosity = 3
sc.settings.autosave = True
results_file = './write/81libs.h5ad'
sc.settings.autosave = True
sc.settings.set_figure_params(dpi=300, frameon=False)
  • 读入矩阵

inpath= 'Matrix/scanpy_mat'
adata2 = sc.read_10x_mtx(path, var_names='gene_symbols', cache=True)
adata2.obs['batch1'] = 'C7-4'

adata= sc.read_10x_h5("./matrix.h5", genome=None, gex_only=True)
adata = sc.read_csv('./matrix.tsv',delimiter='\t')
adata = sc.read_h5ad('./result.h5ad')
adata = sc.read('./result.h5ad')
  • 合并多个对象

#load data
adata1 = sc.read("30libs.h5ad")
adata2 = sc.read("22libs.h5ad")
adata3 = sc.read("28libs.h5ad")

nadata1 = adata1.raw.to_adata()
nadata1.obs['batch1'] = adata1.obs['batch']
nadata2 = adata2.raw.to_adata()
nadata2.obs['batch1'] = adata2.obs['batch']
nadata3 = adata3.raw.to_adata()
nadata3.obs['batch1'] = adata3.obs['batch']

datalist = [nadata2, nadata3]
nadata1 = nadata1[nadata1.obs.batch != "C7-4" , :]

path= mat+'C7-4'+'/04.Matrix/scanpy_mat'
datalist.append(sc.read_10x_mtx(path, var_names='gene_symbols', cache=True))
datalist[2].obs['batch1'] = 'C7-4'


path= mat+'C5-2'+'/04.Matrix/scanpy_mat'
datalist.append(sc.read_10x_mtx(path, var_names='gene_symbols', cache=True))
datalist[3].obs['batch1'] = 'C5-2'

adata = nadata1.concatenate(datalist, join='outer')
  • 添加注释

adata.obs['lable'] = "NA"
c=[ i for i, word in enumerate(list(adata.obs['batch1'])) if word.startswith('A') ]
adata.obs['lable'][c]= "A"
c=[ i for i, word in enumerate(list(adata.obs['batch1'])) if word.startswith('B') ]
adata.obs['lable'][c]= "B"
c=[ i for i, word in enumerate(list(adata.obs['batch1'])) if word.startswith('C') ]
adata.obs['lable'][c]= "C"
c=[ i for i, word in enumerate(list(adata.obs['batch1'])) if word.startswith('Y') ]
adata.obs['lable'][c]= "Y"
  • 输出矩阵

#save raw matrix
pd.DataFrame(data=adata.X.todense().T, index=adata.var_names,columns=adata.obs_names).to_csv('raw_mat.csv',sep="\t",
#                                                                                             float_format='%.0f')
print(str(len(datalist)+1)+' datasets were merged!')
  • 质控

#quality control
sc.pp.filter_cells(adata, min_genes=500)
sc.pp.filter_genes(adata, min_cells=3)

adata.var['mt'] = adata.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

adata.obs['percent_mito'] = adata.obs['pct_counts_mt']
adata.obs['n_counts'] = adata.obs['total_counts']

adata = adata[adata.obs.pct_counts_mt < 10, :]
sc.pl.violin(adata, ['n_genes', 'n_counts', 'percent_mito'],jitter=0.4, multi_panel=True, save='_qc.pdf', stripplot=False)
plt.savefig('./figures/01.22libs_qc.pdf')

sc.pp.calculate_qc_metrics(data, percent_top=None, log1p=False, inplace=True)
sc.pl.violin(data, ['n_genes_by_counts', 'total_counts'],jitter=False, multi_panel=True,stripplot=False)
plt.savefig("QC_violin.pdf")
data = data[data.obs.n_genes < 2500, :]
print(data.obs['n_genes_by_counts'].median(),'\n')
print(data.obs['total_counts'].median(),"\n")
  • 常规聚类

#normalize scale pca umap
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)
adata.raw = adata

sc.pp.highly_variable_genes(adata, n_top_genes=3000)
#sc.pl.highly_variable_genes(adata)
adata = adata[:, adata.var['highly_variable']]
sc.pp.regress_out(adata, ['n_counts', 'percent_mito'])
sc.pp.scale(adata, max_value=10)

sc.tl.pca(adata, n_comps=50, random_state=int(2020), svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40, random_state=int(2020))
sc.tl.umap(adata, random_state=int(2020))

#cluster
sc.tl.louvain(adata, random_state=int(2020))
sc.pl.umap(adata, color=['louvain'], save='_cluster1.pdf')
sc.pl.umap(adata, color=['lable'], save='_cluster2.pdf')
#plt.savefig('./figures/02.22libs_cluster.pdf')

adata.obs['clusters'] = adata.obs['louvain']
  • 聚类点图

data.obsm['X_umap']=data.obsm['spatial']
sc.pl.umap(data, color=['leiden'])
plt.savefig("Umap2.pdf")
  • 基因表达点图

#plot markers
rcParams['figure.figsize'] = 3,3
marker = ["CD68","CD14","CD163","CD3G","CD4","CD8A"]
marker = [gene for gene in marker if (gene in adata.raw.var._stat_axis.values.tolist())]
sc.pl.umap(adata, color=marker, s=50, frameon=False, ncols=3, vmax='p99', save='_l1.pdf')

rcParams['figure.figsize'] = 3,3
marker = ["RTKN2","CHN2","LRRD1","KIAA0408","CCDC144A"]
marker = [gene for gene in marker if (gene in adata.raw.var._stat_axis.values.tolist())]
sc.pl.umap(adata, color=marker, s=50, frameon=False, ncols=3, vmax='p99', save='_l2.pdf')
  • 保存结果

adata.write(results_file)
adata.obs.to_csv('./write/Scanpy_cell_info.csv', sep='\t', header=True, index=True)

adata.write(results_file, compression='gzip')
  • 找DEG

# Finding marker genes
sc.settings.verbosity = 2
sc.tl.rank_genes_groups(adata, 'louvain', method='wilcoxon')
sc.pl.rank_genes_groups_heatmap(adata, n_genes=10, swap_axes=True, show_gene_labels=True, vmin=-3, vmax=3, cmap='bwr')

result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
trgpc = pd.DataFrame({group + '_' + key[:1]: result[key][group] for group in groups for key in ['names', 'scores', 'logfoldchanges','pvals_adj']}).head(20)
trgpc.to_csv(path_or_buf="./write/top20_genes_per_cluster.csv", sep='\t', header=True, index=True)
  • 更改基因名或细胞名

adata.var_names = new_gene_names
adata.obs_names='cell_'+adata.obs_names

相关文章

网友评论

      本文标题:单细胞Scanpy常用脚本2023-08-28

      本文链接:https://www.haomeiwen.com/subject/mefwmdtx.html