-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path04 FunctionalClusters.R
More file actions
75 lines (65 loc) · 2.81 KB
/
04 FunctionalClusters.R
File metadata and controls
75 lines (65 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
############################################################
############################################################
# Functional clusters
#
# Author: Farzan Ghanegolmohammadi
#
############################################################
############################################################
# Description:
# Hierarchical cluster analysis (HCA) of the obtained functional profile
############################################################
setwd("PATH")
# Prerequisite
if("BiocManager" %in% rownames(installed.packages())){
library(BiocManager)
} else {install.packages("BiocManager")}
if("qvalue" %in% rownames(installed.packages())){
library(qvalue)
} else {BiocManager::install("qvalue"); library(qvalue)}
############################################################
## Loading slimmed GO data
load("Data/GO/GOData.rdata")
## Significant level
FDR <- .05
############################################################
# Hierarchical cluster analysis based on functional profile
HCA <- NULL
HCA$hclust <- hclust(dist(GO$ReducedGOmat, method = "binary"), method = "complete")
## Cutting the tree at h = 0.0999
temp <- sort(cutree(HCA$hclust, h = 0.999))
test <- tapply(temp, factor(temp), names)
names(test) <- sprintf("Group%03i", as.numeric(names(test)))
HCA$CutTreeAll <- list(Nodes = temp, Cut = test)
rm(test,temp)
## Funding groups with >2 members
HCA$CutTree_3 <- HCA$CutTreeAll$Cut[sapply(HCA$CutTreeAll$Cut, length) > 2]
save(HCA, file = "Data/GO/HCA.rdata")
######################################
# GO enrichment of the obtained functional groups
#### ORF names from functional grouping given the threshold (>2)
ORF_Names <- unlist(HCA$CutTree_3)
#### GO Boolean matrix with ORF names from functional grouping given the threshold (>2)
GO_Enrich <- GO$ReducedGOmat[ORF_Names,]
Fisher <- NULL
Fisher$PVal <- matrix(NA, nrow = length(HCA$CutTree_3), ncol = ncol(GO_Enrich),
dimnames = list(names(HCA$CutTree_3), colnames(GO_Enrich)))
pb <- txtProgressBar(min = 0, max = length(HCA$CutTree_3), style = 3)
for (j in names(HCA$CutTree_3)) {
GroupedORFs <- vector(mode = "logical", length = length(ORF_Names))
names(GroupedORFs) <- ORF_Names
GroupedORFs[HCA$CutTree_3[[j]]] <- TRUE
if(sum(GroupedORFs) != length(HCA$CutTree_3[[j]])) stop()
for (i in colnames(GO_Enrich)) {
temp <- data.frame(y = factor(as.vector(GO_Enrich[,i]), levels = c("TRUE","FALSE")),
x = factor(GroupedORFs, levels = c("TRUE","FALSE")))
# Fisher's exact test
Fisher$PVal[j,i] <- fisher.test(table(temp), alternative = "greater")$p.value
rm(temp)
}
rm(GroupedORFs)
setTxtProgressBar(pb, which(names(HCA$CutTree_3) == i))
}
rm(i,j,pb)
Fisher$qVal <- qvalue::qvalue(Fisher$PVal)$qvalues
save(Fisher, file = "Data/GO/FisherTest.rdata")