Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
data_mining_gan/mkdataset/get_gse_data.r
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
32 lines (27 sloc)
1.27 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(GEOquery) | |
# Set libcurl as the prefered method to use when downloading files | |
options(download.file.method.GEOquery='libcurl') | |
# Create function to get expression set of given gse | |
# along with the gene symbols | |
get_gse_expression_set <- function(gse_name, symbol_col_name) { | |
# Get the GSE by name and create a dataframe out of the expression set | |
gse <- getGEO(gse_name, GSEMatrix = TRUE)[[1]] | |
gse_df <- as.data.frame(exprs(gse)) | |
gse_df$probe_id <- rownames(gse_df) | |
# Extract Gene Symbols and associated Probe IDs | |
feature_data <- attr(gse, "featureData") | |
symbols <- attr(feature_data, "data")[ , symbol_col_name] | |
probe_ids <- attr(feature_data, "data")$Probe_Id | |
# Create a dataframe to hold the symbols and probe IDs | |
symbol_df <- data.frame(symbol=numeric(length(symbols)), | |
probe_id=character(length(probe_ids)) ) | |
symbol_df$symbol <- symbols | |
symbol_df$probe_id <- probe_ids | |
# Merge the dataframes together | |
complete_data <- merge(x=gse_df, y=symbol_df, by='probe_id', all.x = TRUE) | |
} | |
base_path <- "./" # where to put the resulting file | |
gse_name <- 'GSE52253' | |
symbol_col_name <- 'Symbol' | |
my_data <- get_gse_expression_set(gse_name, 'Symo') | |
write.table(my_data, file=paste(base_path, gse_name, ".csv", sep=""), sep = '\t', row.names=FALSE) |