library(GEOquery) # Set libcurl as the prefered method to use when downloading files options(download.file.method.GEOquery='libcurl') # Create function to get expression set of given gse # along with the gene symbols get_gse_expression_set <- function(gse_name, symbol_col_name) { # Get the GSE by name and create a dataframe out of the expression set gse <- getGEO(gse_name, GSEMatrix = TRUE)[[1]] gse_df <- as.data.frame(exprs(gse)) gse_df$probe_id <- rownames(gse_df) # Extract Gene Symbols and associated Probe IDs feature_data <- attr(gse, "featureData") symbols <- attr(feature_data, "data")[ , symbol_col_name] probe_ids <- attr(feature_data, "data")$Probe_Id # Create a dataframe to hold the symbols and probe IDs symbol_df <- data.frame(symbol=numeric(length(symbols)), probe_id=character(length(probe_ids)) ) symbol_df$symbol <- symbols symbol_df$probe_id <- probe_ids # Merge the dataframes together complete_data <- merge(x=gse_df, y=symbol_df, by='probe_id', all.x = TRUE) } base_path <- "./" # where to put the resulting file gse_name <- 'GSE52253' symbol_col_name <- 'Symbol' my_data <- get_gse_expression_set(gse_name, 'Symo') write.table(my_data, file=paste(base_path, gse_name, ".csv", sep=""), sep = '\t', row.names=FALSE)