Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
updated the clustering component
fixed a few bugs in the clustering/plotting component
  • Loading branch information
hyb13001 committed Sep 19, 2020
1 parent 2674f98 commit e72e1d0
Show file tree
Hide file tree
Showing 13 changed files with 66 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .Rproj.user/43F9182D/pcs/source-pane.pper
@@ -1,3 +1,3 @@
{
"activeTab": 3
"activeTab": 0
}
12 changes: 6 additions & 6 deletions .Rproj.user/43F9182D/pcs/windowlayoutstate.pper
@@ -1,14 +1,14 @@
{
"left": {
"splitterpos": 272,
"splitterpos": 254,
"topwindowstate": "NORMAL",
"panelheight": 664,
"windowheight": 702
"panelheight": 622,
"windowheight": 660
},
"right": {
"splitterpos": 426,
"splitterpos": 399,
"topwindowstate": "NORMAL",
"panelheight": 664,
"windowheight": 702
"panelheight": 622,
"windowheight": 660
}
}
2 changes: 1 addition & 1 deletion .Rproj.user/43F9182D/pcs/workbench-pane.pper
@@ -1,5 +1,5 @@
{
"TabSet1": 3,
"TabSet2": 1,
"TabSet2": 3,
"TabZoom": {}
}
6 changes: 3 additions & 3 deletions .Rproj.user/43F9182D/persistent-state
@@ -1,8 +1,8 @@
build-last-errors="[]"
build-last-errors-base-dir="~/Dropbox/Packages/edgefinder/"
build-last-outputs="[{\"type\":0,\"output\":\"==> devtools::document(roclets = c('rd', 'collate', 'namespace', 'vignette'))\\n\\n\"},{\"type\":2,\"output\":\"Updating edgefinder documentation\\n\"},{\"type\":2,\"output\":\"Loading edgefinder\\n\"},{\"type\":1,\"output\":\"Writing NAMESPACE\\n\"},{\"type\":1,\"output\":\"Writing NAMESPACE\\n\"},{\"type\":2,\"output\":\"Updating vignettes\\n\"},{\"type\":1,\"output\":\"Documentation completed\\n\\n\"},{\"type\":0,\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source edgefinder\\n\\n\"},{\"type\":1,\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\"},{\"type\":1,\"output\":\"* installing *source* package ‘edgefinder’ ...\\n\"},{\"type\":1,\"output\":\"** using staged installation\\n\"},{\"type\":1,\"output\":\"** R\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** data\\n\"},{\"type\":1,\"output\":\"*** moving datasets to lazyload DB\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** byte-compile and prepare package for lazy loading\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** help\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"*** installing help indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** building package indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** installing vignettes\\n\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from temporary location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from final location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\"},{\"type\":1,\"output\":\"* DONE (edgefinder)\\n\"},{\"type\":1,\"output\":\"\"}]"
build-last-outputs="[{\"type\":0,\"output\":\"==> devtools::document(roclets = c('rd', 'collate', 'namespace', 'vignette'))\\n\\n\"},{\"type\":2,\"output\":\"Updating edgefinder documentation\\n\"},{\"type\":2,\"output\":\"Loading edgefinder\\n\"},{\"type\":1,\"output\":\"Writing NAMESPACE\\n\"},{\"type\":1,\"output\":\"Writing NAMESPACE\\n\"},{\"type\":2,\"output\":\"Updating vignettes\\n\"},{\"type\":1,\"output\":\"Documentation completed\\n\\n\"},{\"type\":0,\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source edgefinder\\n\\n\"},{\"type\":1,\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\"},{\"type\":1,\"output\":\"* installing *source* package ‘edgefinder’ ...\\n\"},{\"type\":1,\"output\":\"** using staged installation\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** R\\n\"},{\"type\":1,\"output\":\"** data\\n\"},{\"type\":1,\"output\":\"*** moving datasets to lazyload DB\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** byte-compile and prepare package for lazy loading\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** help\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"*** installing help indices\\n\"},{\"type\":1,\"output\":\"** building package indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** installing vignettes\\n\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from temporary location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from final location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\"},{\"type\":1,\"output\":\"* DONE (edgefinder)\\n\"},{\"type\":1,\"output\":\"\"}]"
compile_pdf_state="{\"tab_visible\":false,\"running\":false,\"target_file\":\"\",\"output\":\"\",\"errors\":[]}"
files.monitored-path=""
find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"ignoreCase\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOn\":[],\"matchOff\":[],\"replaceMatchOn\":[],\"replaceMatchOff\":[]},\"running\":false,\"replace\":false,\"preview\":false,\"gitFlag\":false,\"replacePattern\":\"\"}"
imageDirtyState="1"
saveActionState="-1"
imageDirtyState="0"
saveActionState="0"
2 changes: 1 addition & 1 deletion .Rproj.user/43F9182D/sources/prop/08EF61B6
@@ -1,5 +1,5 @@
{
"cursorPosition": "328,64",
"scrollLine": "326",
"scrollLine": "325",
"last_setup_crc32": ""
}
2 changes: 1 addition & 1 deletion .Rproj.user/43F9182D/sources/prop/73839B89
@@ -1,4 +1,4 @@
{
"cursorPosition": "3,14",
"cursorPosition": "5,25",
"scrollLine": "0"
}
4 changes: 2 additions & 2 deletions .Rproj.user/43F9182D/sources/prop/8F178DC0
@@ -1,4 +1,4 @@
{
"cursorPosition": "639,0",
"scrollLine": "625"
"cursorPosition": "617,17",
"scrollLine": "609"
}
3 changes: 3 additions & 0 deletions .Rproj.user/shared/notebooks/paths
@@ -1,2 +1,5 @@
/Users/haim/Dropbox/Packages/edgefinder/DESCRIPTION="2998DAE0"
/Users/haim/Dropbox/Packages/edgefinder/R/edgefinder.R="83C6AEC8"
/Users/haim/Dropbox/Packages/edgefinder/vignettes/edgefinder.Rmd="248C74FE"
/Users/haim/Dropbox/Projects/DIADEM/newres.R="FD8D0F70"
/Users/haim/Dropbox/Projects/QTL/betaMix/mtb01.R="05D6C52D"
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,7 +1,7 @@
Package: edgefinder
Type: Package
Title: Detect Edges in Sparse Co-expression Graphs
Version: 0.1.6
Version: 0.1.8
Author: Haim Bar and Seojin Bang
Maintainer: Haim Bar <haim.bar@uconn.edu>
Description: Finding edges in co-expression graphs, based on "A Mixture Model to Detect Edges in Sparse Co-expression Graphs", Haim Bar and Seojin Bang. See more details in the vignettes.
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -14,4 +14,5 @@ export(summarizeClusters)
import(graphics)
import(stats)
importFrom(Matrix,Matrix)
importFrom(grDevices,col2rgb)
importFrom(grDevices,rgb)
45 changes: 32 additions & 13 deletions R/edgefinder.R
Expand Up @@ -39,6 +39,7 @@
#' @import stats
#' @importFrom Matrix Matrix
#' @importFrom grDevices rgb
#' @importFrom grDevices col2rgb
#' @examples
#' \donttest{
#' data(WT)
Expand Down Expand Up @@ -257,7 +258,7 @@ logoddsValues <- function(y,theta,tau,mu1,s1,mu2,s2,p1,p2,vals=1:30) {
1 - pnorm(ret[i,2], theta, tau)
# "Power":
ret[i,5] <- (p1*(1-plnorm(ret[i,2], mu1, s1)) +
p2*(1-plnorm(-ret[i,3], mu2, s2)))/(p1+p2)
p2*(1-plnorm(-ret[i,3], mu2, s2)))/(p1+p2)
# FDR:
ret[i,6] <- p0*ret[i,4]/(p0*ret[i,4]+ret[i,5]*(p1+p2))
}
Expand Down Expand Up @@ -310,7 +311,7 @@ GoodnessOfFit <- function(fit.em, mixturemodel="L2N") {

#' Find clusters, and return node characteristics.
#'
#' Take an adjacency Matrix as input and find clusters. For each node, find the degree and clustering coefficient (CC). Then, calculate a centrality measure (type\*CC+1)\*deg. For type=0, it's just the degree. Note that setting type=1 we assign a higher value to nodes that not only have many neighbors, but the neighbors are highly interconnected. For example, suppose we have two components with k nodes, one has a star shape, and the other is a complete graph. With type=0 both graphs will get the same value, but with type=1 the complete graph will be picked by the algorithm first.
#' Take an adjacency Matrix as input and find clusters. For each node, find the degree and clustering coefficient (CC). Then, calculate a centrality measure (type\*CC+1)\*deg. For type=0, it's just the degree. Note that setting type=1 we assign a higher value to nodes that not only have many neighbors, but the neighbors are highly interconnected. For example, suppose we have two components with k nodes, one has a star shape, and the other is a complete graph. With type=0 both graphs will get the same value, but with type=1 the complete graph will be picked by the algorithm first. Setting type to a negative value gives CC\*deg as the centrality measure.
#' @param A An adjacency Matrix(0/1).
#' @param minCtr The minimum centrality value to be considered for a cluster center (default=5).
#' @param type Determines how the centrality measure is computed.
Expand All @@ -319,7 +320,7 @@ GoodnessOfFit <- function(fit.em, mixturemodel="L2N") {
#' \item{labels} {Node label (e.g. gene names).}
#' \item{degree} {Node degree.}
#' \item{cc} {Node clustering coefficient.}
#' \item{ctr} {Node centrality measure: (type\*CC+1)\*deg.}
#' \item{ctr} {Node centrality measure: (type\*CC+1)\*deg, or CC\*deg if type is negative.}
#' \item{clustNo} {Cluster number.}
#' \item {iscenter} {1 for the node was chosen as the cluster's center, 0 otherwise.}
#' \item {intEdges} {Number of edges from the node to nodes in the same cluster.}
Expand All @@ -344,6 +345,8 @@ graphComponents <- function(A, minCtr=5, type=1) {
deg <- Matrix::rowSums(A)
CC <- clusteringCoef(A)
ctrs <- (type*CC+1)*deg
if (type < 0)
ctrs <- CC*deg
clustersInfo <- data.frame(labels=labels, degree=deg, cc=CC, ctr=ctrs,
clustNo=rep(0,Vn), iscenter=rep(0,Vn),
intEdges=rep(0,Vn), extEdges=rep(0,Vn),
Expand All @@ -362,7 +365,11 @@ graphComponents <- function(A, minCtr=5, type=1) {
clustersInfo$iscenter[ctrnode] <- 1
clustersInfo$clustNo[union(ctrnode,nbrs)] <- clustNo
clustersInfo$intEdges[nbrs] <- Matrix::rowSums(A[nbrs,nbrs])
clustersInfo$extEdges[nbrs] <- Matrix::rowSums(A[nbrs,-nbrs])
if (length(nbrs) < ncol(A)) {
clustersInfo$extEdges[nbrs] <- Matrix::rowSums(A[nbrs,-nbrs])
} else {
clustersInfo$extEdges[nbrs] <- 0
}
for (i in 1:length(nbrs)) {
clustersInfo$distCenter[nbrs[i]] <- mean(xor(A[ctrnode,], A[nbrs[i],]))
}
Expand Down Expand Up @@ -400,11 +407,13 @@ summarizeClusters <- function(clustersInfo) {
cat("Num of unclustered nodes:", length(which(clustersInfo$clustNo == 0)),"\n")
percentInCluster <- clustersInfo$intEdges/clustersInfo$degree
percentInCluster[which(clustersInfo$degree == 0)] <- 0
if (max(clustersInfo$clustNo) == 0)
return(NULL)
tab <- matrix(0,nrow=max(clustersInfo$clustNo),ncol=12)
for (cnum in 1:max(clustersInfo$clustNo)) {
tmpclusterInfo <- clustersInfo[which(clustersInfo$clustNo == cnum),]
tab[cnum,] <- c(cnum,nrow(tmpclusterInfo), fivenum(tmpclusterInfo$degree),
fivenum(percentInCluster[which(clustersInfo$clustNo == cnum)]))
fivenum(percentInCluster[which(clustersInfo$clustNo == cnum)]))
}
colnames(tab) <- c("Cluster","Nodes","degreeMin","degreeQ25","degreeMedian",
"degreeQ75","degreeMax","pctInClstMin","pctInClstQ25",
Expand Down Expand Up @@ -434,16 +443,21 @@ collapsedGraph <- function(A, clustersInfo) {
collA <- Matrix::Matrix(0, ncol=collDim, nrow=collDim)
inCluster <- which(clustersInfo$clustNo > 0)
notInCluster <- which(clustersInfo$clustNo == 0)
collA[1:length(notInCluster), 1:length(notInCluster)] <- A[notInCluster, notInCluster]>0
if (length(notInCluster) > 0) {
collA[1:length(notInCluster), 1:length(notInCluster)] <- A[notInCluster, notInCluster]>0
}
if (length(rownames(A)) != nrow(A)) {
rownames(A) <- 1:nrow(A)
}
rownames(collA) <- c(rownames(A)[notInCluster],
paste0("CLS",1:max(clustersInfo$clustNo)))
for (i in 1:max(clustersInfo$clustNo)) {
Ci <- which(clustersInfo$clustNo == i)
collA[i+length(notInCluster),1:length(notInCluster)] <-
Matrix::rowSums(A[notInCluster,which(clustersInfo$clustNo==i)])
if (length(notInCluster) > 0) {
Atmp <- matrix(A[notInCluster,which(clustersInfo$clustNo==i)],
nrow=length(notInCluster), ncol=length(which(clustersInfo$clustNo==i)))
collA[i+length(notInCluster),1:length(notInCluster)] <- Matrix::rowSums(Atmp)
}
if (i < max(clustersInfo$clustNo)) {
for (j in (i+1):max(clustersInfo$clustNo)) {
Cj <- which(clustersInfo$clustNo == j)
Expand All @@ -455,6 +469,7 @@ collapsedGraph <- function(A, clustersInfo) {
}



#' Calculate the clustering coefficient of each node.
#'
#' @param A an adjacency Matrix (0/1).
Expand Down Expand Up @@ -587,6 +602,8 @@ plotBitmapCC <- function(AdjMat, clusterInfo=NULL, orderByCluster=FALSE, showMin
#' @param clustNo The chosen cluster.
#' @param clusterInfo Obtained from graphComponents.
#' @param labels If set to TRUE, show node names (default=FALSE).
#' @param nodecol The color(s) of the nodes. Can be a single value or a vector of length equal to the number of rows in AdjMat
#' @param labelsize Text size of node labels.
#' @export
#' @examples
#' \donttest{
Expand All @@ -595,9 +612,11 @@ plotBitmapCC <- function(AdjMat, clusterInfo=NULL, orderByCluster=FALSE, showMin
#' WTComp <- graphComponents(WTres$AdjMat)
#' plotCluster(WTres$AdjMat, 5, WTComp)
#' }
plotCluster <- function(AdjMat, clustNo, clusterInfo=NULL, labels=FALSE) {
plotCluster <- function(AdjMat, clustNo, clusterInfo=NULL, labels=FALSE, nodecol="blue",labelsize=1) {
if(is.null(clusterInfo))
clusterInfo <- graphComponents(AdjMat)
if(length(nodecol) < nrow(AdjMat))
nodecol <- rep(nodecol[1],length=nrow(AdjMat))
ids <- which(clusterInfo$clustNo == clustNo)
if (length(ids) > 0) {
tmpA <- AdjMat[ids,ids]
Expand All @@ -612,8 +631,9 @@ plotCluster <- function(AdjMat, clustNo, clusterInfo=NULL, labels=FALSE) {
sizes <- pmax(0.3,tmpclusterInfo$degree/max(tmpclusterInfo$degree))
opacity <- 0.25+tmpclusterInfo$intEdges/tmpclusterInfo$degree
opacity <- opacity/max(opacity)
nodecol <- rgb(t(col2rgb(nodecol)/255),alpha=opacity)[ids]
plot(rads*cos(thetas), rads*sin(thetas),cex=sizes*3, pch=19,axes=F,
xlab="",ylab="",col=rgb(red = 0, green = 0, blue = 1, alpha = opacity))
xlab="",ylab="",col=nodecol)
for (i in 1:ncol(tmpA)) {
nbrs <- setdiff(which(tmpA[i,] == 1), 1:i)
if(length(nbrs) > 0) {
Expand All @@ -624,10 +644,9 @@ plotCluster <- function(AdjMat, clustNo, clusterInfo=NULL, labels=FALSE) {
}
}
}
points(rads*cos(thetas), rads*sin(thetas),cex=sizes*3, pch=19,
col=rgb(red = 0, green = 0, blue = 1, alpha = opacity))
points(rads*cos(thetas), rads*sin(thetas),cex=sizes*3, pch=19, col=nodecol)
if (labels)
text(rads*cos(thetas), rads*sin(thetas), tmpclusterInfo$labels, pos=3)
text(rads*cos(thetas), rads*sin(thetas), tmpclusterInfo$labels, pos=3, cex=labelsize)
ctr <- which(tmpclusterInfo$iscenter==1)
points(rads[ctr]*cos(thetas[ctr]), rads[ctr]*sin(thetas[ctr]),pch=21,
cex=sizes[ctr]*3, col="black",lwd=2)
Expand Down
4 changes: 2 additions & 2 deletions man/graphComponents.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion man/plotCluster.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e72e1d0

Please sign in to comment.