diff --git a/.Rproj.user/43F9182D/pcs/source-pane.pper b/.Rproj.user/43F9182D/pcs/source-pane.pper index 28a3c2e..902cc6f 100644 --- a/.Rproj.user/43F9182D/pcs/source-pane.pper +++ b/.Rproj.user/43F9182D/pcs/source-pane.pper @@ -1,3 +1,3 @@ { - "activeTab": 3 + "activeTab": 0 } \ No newline at end of file diff --git a/.Rproj.user/43F9182D/pcs/windowlayoutstate.pper b/.Rproj.user/43F9182D/pcs/windowlayoutstate.pper index f4fc4e6..2fa5539 100644 --- a/.Rproj.user/43F9182D/pcs/windowlayoutstate.pper +++ b/.Rproj.user/43F9182D/pcs/windowlayoutstate.pper @@ -1,14 +1,14 @@ { "left": { - "splitterpos": 272, + "splitterpos": 254, "topwindowstate": "NORMAL", - "panelheight": 664, - "windowheight": 702 + "panelheight": 622, + "windowheight": 660 }, "right": { - "splitterpos": 426, + "splitterpos": 399, "topwindowstate": "NORMAL", - "panelheight": 664, - "windowheight": 702 + "panelheight": 622, + "windowheight": 660 } } \ No newline at end of file diff --git a/.Rproj.user/43F9182D/pcs/workbench-pane.pper b/.Rproj.user/43F9182D/pcs/workbench-pane.pper index 1893013..f398270 100644 --- a/.Rproj.user/43F9182D/pcs/workbench-pane.pper +++ b/.Rproj.user/43F9182D/pcs/workbench-pane.pper @@ -1,5 +1,5 @@ { "TabSet1": 3, - "TabSet2": 1, + "TabSet2": 3, "TabZoom": {} } \ No newline at end of file diff --git a/.Rproj.user/43F9182D/persistent-state b/.Rproj.user/43F9182D/persistent-state index e9fde1f..0307923 100644 --- a/.Rproj.user/43F9182D/persistent-state +++ b/.Rproj.user/43F9182D/persistent-state @@ -1,8 +1,8 @@ build-last-errors="[]" build-last-errors-base-dir="~/Dropbox/Packages/edgefinder/" -build-last-outputs="[{\"type\":0,\"output\":\"==> devtools::document(roclets = c('rd', 'collate', 'namespace', 'vignette'))\\n\\n\"},{\"type\":2,\"output\":\"Updating edgefinder documentation\\n\"},{\"type\":2,\"output\":\"Loading edgefinder\\n\"},{\"type\":1,\"output\":\"Writing NAMESPACE\\n\"},{\"type\":1,\"output\":\"Writing NAMESPACE\\n\"},{\"type\":2,\"output\":\"Updating vignettes\\n\"},{\"type\":1,\"output\":\"Documentation completed\\n\\n\"},{\"type\":0,\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source edgefinder\\n\\n\"},{\"type\":1,\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\"},{\"type\":1,\"output\":\"* installing *source* package ‘edgefinder’ ...\\n\"},{\"type\":1,\"output\":\"** using staged installation\\n\"},{\"type\":1,\"output\":\"** R\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** data\\n\"},{\"type\":1,\"output\":\"*** moving datasets to lazyload DB\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** byte-compile and prepare package for lazy loading\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** help\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"*** installing help indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** building package indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** installing vignettes\\n\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from temporary location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from final location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\"},{\"type\":1,\"output\":\"* DONE (edgefinder)\\n\"},{\"type\":1,\"output\":\"\"}]" +build-last-outputs="[{\"type\":0,\"output\":\"==> devtools::document(roclets = c('rd', 'collate', 'namespace', 'vignette'))\\n\\n\"},{\"type\":2,\"output\":\"Updating edgefinder documentation\\n\"},{\"type\":2,\"output\":\"Loading edgefinder\\n\"},{\"type\":1,\"output\":\"Writing NAMESPACE\\n\"},{\"type\":1,\"output\":\"Writing NAMESPACE\\n\"},{\"type\":2,\"output\":\"Updating vignettes\\n\"},{\"type\":1,\"output\":\"Documentation completed\\n\\n\"},{\"type\":0,\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source edgefinder\\n\\n\"},{\"type\":1,\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\"},{\"type\":1,\"output\":\"* installing *source* package ‘edgefinder’ ...\\n\"},{\"type\":1,\"output\":\"** using staged installation\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** R\\n\"},{\"type\":1,\"output\":\"** data\\n\"},{\"type\":1,\"output\":\"*** moving datasets to lazyload DB\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** byte-compile and prepare package for lazy loading\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** help\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"*** installing help indices\\n\"},{\"type\":1,\"output\":\"** building package indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** installing vignettes\\n\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from temporary location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from final location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\"},{\"type\":1,\"output\":\"* DONE (edgefinder)\\n\"},{\"type\":1,\"output\":\"\"}]" compile_pdf_state="{\"tab_visible\":false,\"running\":false,\"target_file\":\"\",\"output\":\"\",\"errors\":[]}" files.monitored-path="" find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"ignoreCase\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOn\":[],\"matchOff\":[],\"replaceMatchOn\":[],\"replaceMatchOff\":[]},\"running\":false,\"replace\":false,\"preview\":false,\"gitFlag\":false,\"replacePattern\":\"\"}" -imageDirtyState="1" -saveActionState="-1" +imageDirtyState="0" +saveActionState="0" diff --git a/.Rproj.user/43F9182D/sources/prop/08EF61B6 b/.Rproj.user/43F9182D/sources/prop/08EF61B6 index 97a9276..04dfba9 100644 --- a/.Rproj.user/43F9182D/sources/prop/08EF61B6 +++ b/.Rproj.user/43F9182D/sources/prop/08EF61B6 @@ -1,5 +1,5 @@ { "cursorPosition": "328,64", - "scrollLine": "326", + "scrollLine": "325", "last_setup_crc32": "" } \ No newline at end of file diff --git a/.Rproj.user/43F9182D/sources/prop/73839B89 b/.Rproj.user/43F9182D/sources/prop/73839B89 index a1be1af..4d6a0a0 100644 --- a/.Rproj.user/43F9182D/sources/prop/73839B89 +++ b/.Rproj.user/43F9182D/sources/prop/73839B89 @@ -1,4 +1,4 @@ { - "cursorPosition": "3,14", + "cursorPosition": "5,25", "scrollLine": "0" } \ No newline at end of file diff --git a/.Rproj.user/43F9182D/sources/prop/8F178DC0 b/.Rproj.user/43F9182D/sources/prop/8F178DC0 index d085d65..217797a 100644 --- a/.Rproj.user/43F9182D/sources/prop/8F178DC0 +++ b/.Rproj.user/43F9182D/sources/prop/8F178DC0 @@ -1,4 +1,4 @@ { - "cursorPosition": "639,0", - "scrollLine": "625" + "cursorPosition": "617,17", + "scrollLine": "609" } \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 51dc0e0..0710f16 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -1,2 +1,5 @@ +/Users/haim/Dropbox/Packages/edgefinder/DESCRIPTION="2998DAE0" +/Users/haim/Dropbox/Packages/edgefinder/R/edgefinder.R="83C6AEC8" /Users/haim/Dropbox/Packages/edgefinder/vignettes/edgefinder.Rmd="248C74FE" /Users/haim/Dropbox/Projects/DIADEM/newres.R="FD8D0F70" +/Users/haim/Dropbox/Projects/QTL/betaMix/mtb01.R="05D6C52D" diff --git a/DESCRIPTION b/DESCRIPTION index 4d07f67..58c8db0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: edgefinder Type: Package Title: Detect Edges in Sparse Co-expression Graphs -Version: 0.1.6 +Version: 0.1.8 Author: Haim Bar and Seojin Bang Maintainer: Haim Bar Description: Finding edges in co-expression graphs, based on "A Mixture Model to Detect Edges in Sparse Co-expression Graphs", Haim Bar and Seojin Bang. See more details in the vignettes. diff --git a/NAMESPACE b/NAMESPACE index 37654c6..c66d5a0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,4 +14,5 @@ export(summarizeClusters) import(graphics) import(stats) importFrom(Matrix,Matrix) +importFrom(grDevices,col2rgb) importFrom(grDevices,rgb) diff --git a/R/edgefinder.R b/R/edgefinder.R index c64b0b9..5382dba 100644 --- a/R/edgefinder.R +++ b/R/edgefinder.R @@ -39,6 +39,7 @@ #' @import stats #' @importFrom Matrix Matrix #' @importFrom grDevices rgb +#' @importFrom grDevices col2rgb #' @examples #' \donttest{ #' data(WT) @@ -257,7 +258,7 @@ logoddsValues <- function(y,theta,tau,mu1,s1,mu2,s2,p1,p2,vals=1:30) { 1 - pnorm(ret[i,2], theta, tau) # "Power": ret[i,5] <- (p1*(1-plnorm(ret[i,2], mu1, s1)) + - p2*(1-plnorm(-ret[i,3], mu2, s2)))/(p1+p2) + p2*(1-plnorm(-ret[i,3], mu2, s2)))/(p1+p2) # FDR: ret[i,6] <- p0*ret[i,4]/(p0*ret[i,4]+ret[i,5]*(p1+p2)) } @@ -310,7 +311,7 @@ GoodnessOfFit <- function(fit.em, mixturemodel="L2N") { #' Find clusters, and return node characteristics. #' -#' Take an adjacency Matrix as input and find clusters. For each node, find the degree and clustering coefficient (CC). Then, calculate a centrality measure (type\*CC+1)\*deg. For type=0, it's just the degree. Note that setting type=1 we assign a higher value to nodes that not only have many neighbors, but the neighbors are highly interconnected. For example, suppose we have two components with k nodes, one has a star shape, and the other is a complete graph. With type=0 both graphs will get the same value, but with type=1 the complete graph will be picked by the algorithm first. +#' Take an adjacency Matrix as input and find clusters. For each node, find the degree and clustering coefficient (CC). Then, calculate a centrality measure (type\*CC+1)\*deg. For type=0, it's just the degree. Note that setting type=1 we assign a higher value to nodes that not only have many neighbors, but the neighbors are highly interconnected. For example, suppose we have two components with k nodes, one has a star shape, and the other is a complete graph. With type=0 both graphs will get the same value, but with type=1 the complete graph will be picked by the algorithm first. Setting type to a negative value gives CC\*deg as the centrality measure. #' @param A An adjacency Matrix(0/1). #' @param minCtr The minimum centrality value to be considered for a cluster center (default=5). #' @param type Determines how the centrality measure is computed. @@ -319,7 +320,7 @@ GoodnessOfFit <- function(fit.em, mixturemodel="L2N") { #' \item{labels} {Node label (e.g. gene names).} #' \item{degree} {Node degree.} #' \item{cc} {Node clustering coefficient.} -#' \item{ctr} {Node centrality measure: (type\*CC+1)\*deg.} +#' \item{ctr} {Node centrality measure: (type\*CC+1)\*deg, or CC\*deg if type is negative.} #' \item{clustNo} {Cluster number.} #' \item {iscenter} {1 for the node was chosen as the cluster's center, 0 otherwise.} #' \item {intEdges} {Number of edges from the node to nodes in the same cluster.} @@ -344,6 +345,8 @@ graphComponents <- function(A, minCtr=5, type=1) { deg <- Matrix::rowSums(A) CC <- clusteringCoef(A) ctrs <- (type*CC+1)*deg + if (type < 0) + ctrs <- CC*deg clustersInfo <- data.frame(labels=labels, degree=deg, cc=CC, ctr=ctrs, clustNo=rep(0,Vn), iscenter=rep(0,Vn), intEdges=rep(0,Vn), extEdges=rep(0,Vn), @@ -362,7 +365,11 @@ graphComponents <- function(A, minCtr=5, type=1) { clustersInfo$iscenter[ctrnode] <- 1 clustersInfo$clustNo[union(ctrnode,nbrs)] <- clustNo clustersInfo$intEdges[nbrs] <- Matrix::rowSums(A[nbrs,nbrs]) - clustersInfo$extEdges[nbrs] <- Matrix::rowSums(A[nbrs,-nbrs]) + if (length(nbrs) < ncol(A)) { + clustersInfo$extEdges[nbrs] <- Matrix::rowSums(A[nbrs,-nbrs]) + } else { + clustersInfo$extEdges[nbrs] <- 0 + } for (i in 1:length(nbrs)) { clustersInfo$distCenter[nbrs[i]] <- mean(xor(A[ctrnode,], A[nbrs[i],])) } @@ -400,11 +407,13 @@ summarizeClusters <- function(clustersInfo) { cat("Num of unclustered nodes:", length(which(clustersInfo$clustNo == 0)),"\n") percentInCluster <- clustersInfo$intEdges/clustersInfo$degree percentInCluster[which(clustersInfo$degree == 0)] <- 0 + if (max(clustersInfo$clustNo) == 0) + return(NULL) tab <- matrix(0,nrow=max(clustersInfo$clustNo),ncol=12) for (cnum in 1:max(clustersInfo$clustNo)) { tmpclusterInfo <- clustersInfo[which(clustersInfo$clustNo == cnum),] tab[cnum,] <- c(cnum,nrow(tmpclusterInfo), fivenum(tmpclusterInfo$degree), - fivenum(percentInCluster[which(clustersInfo$clustNo == cnum)])) + fivenum(percentInCluster[which(clustersInfo$clustNo == cnum)])) } colnames(tab) <- c("Cluster","Nodes","degreeMin","degreeQ25","degreeMedian", "degreeQ75","degreeMax","pctInClstMin","pctInClstQ25", @@ -434,7 +443,9 @@ collapsedGraph <- function(A, clustersInfo) { collA <- Matrix::Matrix(0, ncol=collDim, nrow=collDim) inCluster <- which(clustersInfo$clustNo > 0) notInCluster <- which(clustersInfo$clustNo == 0) - collA[1:length(notInCluster), 1:length(notInCluster)] <- A[notInCluster, notInCluster]>0 + if (length(notInCluster) > 0) { + collA[1:length(notInCluster), 1:length(notInCluster)] <- A[notInCluster, notInCluster]>0 + } if (length(rownames(A)) != nrow(A)) { rownames(A) <- 1:nrow(A) } @@ -442,8 +453,11 @@ collapsedGraph <- function(A, clustersInfo) { paste0("CLS",1:max(clustersInfo$clustNo))) for (i in 1:max(clustersInfo$clustNo)) { Ci <- which(clustersInfo$clustNo == i) - collA[i+length(notInCluster),1:length(notInCluster)] <- - Matrix::rowSums(A[notInCluster,which(clustersInfo$clustNo==i)]) + if (length(notInCluster) > 0) { + Atmp <- matrix(A[notInCluster,which(clustersInfo$clustNo==i)], + nrow=length(notInCluster), ncol=length(which(clustersInfo$clustNo==i))) + collA[i+length(notInCluster),1:length(notInCluster)] <- Matrix::rowSums(Atmp) + } if (i < max(clustersInfo$clustNo)) { for (j in (i+1):max(clustersInfo$clustNo)) { Cj <- which(clustersInfo$clustNo == j) @@ -455,6 +469,7 @@ collapsedGraph <- function(A, clustersInfo) { } + #' Calculate the clustering coefficient of each node. #' #' @param A an adjacency Matrix (0/1). @@ -587,6 +602,8 @@ plotBitmapCC <- function(AdjMat, clusterInfo=NULL, orderByCluster=FALSE, showMin #' @param clustNo The chosen cluster. #' @param clusterInfo Obtained from graphComponents. #' @param labels If set to TRUE, show node names (default=FALSE). +#' @param nodecol The color(s) of the nodes. Can be a single value or a vector of length equal to the number of rows in AdjMat +#' @param labelsize Text size of node labels. #' @export #' @examples #' \donttest{ @@ -595,9 +612,11 @@ plotBitmapCC <- function(AdjMat, clusterInfo=NULL, orderByCluster=FALSE, showMin #' WTComp <- graphComponents(WTres$AdjMat) #' plotCluster(WTres$AdjMat, 5, WTComp) #' } -plotCluster <- function(AdjMat, clustNo, clusterInfo=NULL, labels=FALSE) { +plotCluster <- function(AdjMat, clustNo, clusterInfo=NULL, labels=FALSE, nodecol="blue",labelsize=1) { if(is.null(clusterInfo)) clusterInfo <- graphComponents(AdjMat) + if(length(nodecol) < nrow(AdjMat)) + nodecol <- rep(nodecol[1],length=nrow(AdjMat)) ids <- which(clusterInfo$clustNo == clustNo) if (length(ids) > 0) { tmpA <- AdjMat[ids,ids] @@ -612,8 +631,9 @@ plotCluster <- function(AdjMat, clustNo, clusterInfo=NULL, labels=FALSE) { sizes <- pmax(0.3,tmpclusterInfo$degree/max(tmpclusterInfo$degree)) opacity <- 0.25+tmpclusterInfo$intEdges/tmpclusterInfo$degree opacity <- opacity/max(opacity) + nodecol <- rgb(t(col2rgb(nodecol)/255),alpha=opacity)[ids] plot(rads*cos(thetas), rads*sin(thetas),cex=sizes*3, pch=19,axes=F, - xlab="",ylab="",col=rgb(red = 0, green = 0, blue = 1, alpha = opacity)) + xlab="",ylab="",col=nodecol) for (i in 1:ncol(tmpA)) { nbrs <- setdiff(which(tmpA[i,] == 1), 1:i) if(length(nbrs) > 0) { @@ -624,10 +644,9 @@ plotCluster <- function(AdjMat, clustNo, clusterInfo=NULL, labels=FALSE) { } } } - points(rads*cos(thetas), rads*sin(thetas),cex=sizes*3, pch=19, - col=rgb(red = 0, green = 0, blue = 1, alpha = opacity)) + points(rads*cos(thetas), rads*sin(thetas),cex=sizes*3, pch=19, col=nodecol) if (labels) - text(rads*cos(thetas), rads*sin(thetas), tmpclusterInfo$labels, pos=3) + text(rads*cos(thetas), rads*sin(thetas), tmpclusterInfo$labels, pos=3, cex=labelsize) ctr <- which(tmpclusterInfo$iscenter==1) points(rads[ctr]*cos(thetas[ctr]), rads[ctr]*sin(thetas[ctr]),pch=21, cex=sizes[ctr]*3, col="black",lwd=2) diff --git a/man/graphComponents.Rd b/man/graphComponents.Rd index 041e1d4..33e1881 100644 --- a/man/graphComponents.Rd +++ b/man/graphComponents.Rd @@ -19,7 +19,7 @@ A data frame with the following columns \item{labels} {Node label (e.g. gene names).} \item{degree} {Node degree.} \item{cc} {Node clustering coefficient.} -\item{ctr} {Node centrality measure: (type\*CC+1)\*deg.} +\item{ctr} {Node centrality measure: (type\*CC+1)\*deg, or CC\*deg if type is negative.} \item{clustNo} {Cluster number.} \item {iscenter} {1 for the node was chosen as the cluster's center, 0 otherwise.} \item {intEdges} {Number of edges from the node to nodes in the same cluster.} @@ -28,7 +28,7 @@ A data frame with the following columns } } \description{ -Take an adjacency Matrix as input and find clusters. For each node, find the degree and clustering coefficient (CC). Then, calculate a centrality measure (type\*CC+1)\*deg. For type=0, it's just the degree. Note that setting type=1 we assign a higher value to nodes that not only have many neighbors, but the neighbors are highly interconnected. For example, suppose we have two components with k nodes, one has a star shape, and the other is a complete graph. With type=0 both graphs will get the same value, but with type=1 the complete graph will be picked by the algorithm first. +Take an adjacency Matrix as input and find clusters. For each node, find the degree and clustering coefficient (CC). Then, calculate a centrality measure (type\*CC+1)\*deg. For type=0, it's just the degree. Note that setting type=1 we assign a higher value to nodes that not only have many neighbors, but the neighbors are highly interconnected. For example, suppose we have two components with k nodes, one has a star shape, and the other is a complete graph. With type=0 both graphs will get the same value, but with type=1 the complete graph will be picked by the algorithm first. Setting type to a negative value gives CC\*deg as the centrality measure. } \examples{ \donttest{ diff --git a/man/plotCluster.Rd b/man/plotCluster.Rd index 4e596e6..4d148bb 100644 --- a/man/plotCluster.Rd +++ b/man/plotCluster.Rd @@ -4,7 +4,14 @@ \alias{plotCluster} \title{Plot cluster network} \usage{ -plotCluster(AdjMat, clustNo, clusterInfo = NULL, labels = FALSE) +plotCluster( + AdjMat, + clustNo, + clusterInfo = NULL, + labels = FALSE, + nodecol = "blue", + labelsize = 1 +) } \arguments{ \item{AdjMat}{An adjacency Matrix (0/1).} @@ -14,6 +21,10 @@ plotCluster(AdjMat, clustNo, clusterInfo = NULL, labels = FALSE) \item{clusterInfo}{Obtained from graphComponents.} \item{labels}{If set to TRUE, show node names (default=FALSE).} + +\item{nodecol}{The color(s) of the nodes. Can be a single value or a vector of length equal to the number of rows in AdjMat} + +\item{labelsize}{Text size of node labels.} } \description{ Plot a cluster network with all the nodes and edges - the central node is marked by a black circle. The radius of each point corresponds to its degree. The opacity corresponds to the percentage of edges from the node that is in the cluster (the darker it is, the larger the percentage of edges is within the cluster.) The distance from the center corresponds to the relative dissimilarity with the central node. This is computed as the number of neighbors the node and the central node do not have in common.