Network inference and analysis of CLL data

Frédéric Bertrand and Myriam Maumy-Bertrand

Université de Strasbourg et CNRS
frederic.bertrand@math.unistra.fr

2019-08-24

Data preparation

Retrieve the full CLL dataset.

require(Patterns)
CLLfile <- "https://github.com/fbertran/Patterns/raw/master/add_data/CLL.RData"
repmis::source_data(CLLfile)

CLL[1:10,1:5]

Split the CLL dataset into healthy and aggressive stimulated and unstimulated dataset.

hea_US<-CLL[,which((1:48)%%8<5&(1:48)%%8>0)+2]
hea_S<-CLL[,which(!((1:48)%%8<5&(1:48)%%8>0))+2]

agg_US<-CLL[,which((1:40)%%8<5&(1:40)%%8>0)+98]
agg_S<-CLL[,which(!((1:40)%%8<5&(1:40)%%8>0))+98]

m_hea_US<-as.micro_array(hea_US,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2])
m_hea_S<- as.micro_array(hea_S,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2])
  
m_agg_US<-as.micro_array((agg_US),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2])
m_agg_S<- as.micro_array((agg_S),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2])

Focus on EGR1, run the code to get the graph of the expression values (pasted together for all the subjects) for all the probeset tagged as EGR1.

matplot(t(log(agg_S[which(CLL[,2] %in% "EGR1"),])),type="l",lty=1)

Selection genes according to their profiles.

selection1<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)),-1,alpha=0.1)
selection2<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+1),-1,alpha=0.1)
selection3<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+2),50,alpha=0.005)
selection4<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+3),50,alpha=0.005)

Merge the four selections into a single one.

selection<-Patterns::unionMicro(list(selection1,selection2,selection3,selection4))
summary(selection)

Number of genes in the merged selection.

length(selection@gene_ID)

Translate the probesets’ names for the selection.

require(biomaRt)

affyids=c("202763_at","209310_s_at","207500_at")
ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl")
infos<-getBM(attributes=c("affy_hg_u133_plus_2","ensembl_gene_id","entrezgene","hgnc_symbol","chromosome_name","start_position","end_position","band"), filters = "affy_hg_u133_plus_2", values = CLL[CLL[,1] %in% selection@name,1] , mart = ensembl,uniqueRows=TRUE, checkFilters = TRUE)
selection@gene_ID <- lapply(selection@name,function(x) {unique(infos[infos$affy_hg_u133_plus_2==x,"hgnc_symbol"])})

Network inference

Add groupping information according to the pre-merge selection membership to perform network inference.

selection@group <- rep(NA, length(selection@name))
names(selection@group) <- selection@name
selection@group[selection@name %in% selection4@name] <- 4
selection@group[selection@name %in% selection3@name] <- 3
selection@group[selection@name %in% selection2@name] <- 2
selection@group[selection@name %in% selection1@name] <- 1
plot(selection)

Check the length of the group slot of the selection object.

length(selection@group)

Performs a lasso based inference of the network. Then prints the network pbject.

network<-inference(selection,fitfun="LASSO2",Finit=CascadeFinit(4,4),Fshape=CascadeFshape(4,4))
str(network)

Plot the inferred F matrix.

plotF(network@F, choice='F')

Save results.

save(list=c("selection"),file="selection.RData")
save(list=c("infos"),file="infos.RData")

Focus on transcription factors.

Retrieve human transcription factors from HumanTFDB, extracted from AnimalTFDB 3.0: a comprehensive resource for annotation and prediction of animal transcription factors. Hui Hu, Ya-Ru Miao, Long-Hao Jia, Qing-Yang Yu, Qiong Zhang and An-Yuan Guo. Nucl. Acids Res. (2018).

doc <- read.delim("http://bioinfo.life.hust.edu.cn/static/AnimalTFDB3/download/Homo_sapiens_TF",encoding = "UTF-8", header=TRUE)

TF<-as.character(doc[,"Symbol"])
TF<-TF[order(TF)]

The TF object holds the list of human transcription factors geneID. We retrieve those that are in the selection object.

infos_selection <- infos[infos$affy_hg_u133_plus_2 %in% selection@name,]
tfs<-which(infos_selection[,"hgnc_symbol"] %in% TF)

Some plots of the TF found in the selection.

matplot(t(selection@microarray[tfs,]),type="l",lty=1)
kk<-kmeans((selection@microarray[tfs,]),10)
matplot(t(kk$centers),type="l",lty=1)