Skip to content
Permalink
Newer
Older
100644 32 lines (27 sloc) 1.27 KB
1
library(GEOquery)
2
# Set libcurl as the prefered method to use when downloading files
3
options(download.file.method.GEOquery='libcurl')
4
5
# Create function to get expression set of given gse
6
# along with the gene symbols
7
get_gse_expression_set <- function(gse_name, symbol_col_name) {
8
# Get the GSE by name and create a dataframe out of the expression set
9
gse <- getGEO(gse_name, GSEMatrix = TRUE)[[1]]
10
gse_df <- as.data.frame(exprs(gse))
11
gse_df$probe_id <- rownames(gse_df)
13
# Extract Gene Symbols and associated Probe IDs
14
feature_data <- attr(gse, "featureData")
15
symbols <- attr(feature_data, "data")[ , symbol_col_name]
16
probe_ids <- attr(feature_data, "data")$Probe_Id
18
# Create a dataframe to hold the symbols and probe IDs
19
symbol_df <- data.frame(symbol=numeric(length(symbols)),
20
probe_id=character(length(probe_ids)) )
21
symbol_df$symbol <- symbols
22
symbol_df$probe_id <- probe_ids
23
24
# Merge the dataframes together
25
complete_data <- merge(x=gse_df, y=symbol_df, by='probe_id', all.x = TRUE)
26
}
27
28
base_path <- "./" # where to put the resulting file
29
gse_name <- 'GSE52253'
30
symbol_col_name <- 'Symbol'
31
my_data <- get_gse_expression_set(gse_name, 'Symo')
32
write.table(my_data, file=paste(base_path, gse_name, ".csv", sep=""), sep = '\t', row.names=FALSE)
You can’t perform that action at this time.