Skip to content
Permalink
master
Go to file
 
 
Cannot retrieve contributors at this time
47 lines (39 sloc) 1.86 KB
library(dplyr)
library(data.table)
# Constants
base_path <- './'
# Functions
get_gene_list <- function(gene_file_name) {
base_path <- './'
genes_list_file_path <- paste(base_path, 'human_mouse_gene_lists/', gene_file_name, sep='')
genes_list_data <- read.table(genes_list_file_path, sep='\t', header=TRUE)
genes_list_data$Symbol <- tolower(genes_list_data$Symbol)
return(c(genes_list_data$Symbol, 'control', 'class'))
}
# Parameters
output_file_name = 'tham_lasso_dataset.csv'
# Create gene_list
# genes_list <- get_gene_list('human_genes.csv') # Just Human data
# genes_list <- get_gene_list('new_mouse_clean.csv') # Just mouse Data
# For intersection of both mouse and human
# genes_list_human <- get_gene_list('human_genes.csv')
# genes_list_mouse <- get_gene_list('new_mouse_clean.csv')
# genes_list <- intersect(genes_list_human, genes_list_mouse)
genes_list <- get_gene_list('lasso_gene_list.csv')
# Get Tham's data
gse_file <- paste(base_path, 'class_data/gse70559_log2ratio_data.csv', sep='')
gse_data <- read.table(gse_file, sep='\t', header=TRUE)
gse_data <- gse_data[!duplicated(gse_data$TEST), ] # Remove duplicate genes
# Convert the gene symbols to all lowercase
gse_data$TEST <- tolower(gse_data$TEST)
# Select the genes
selected_genes <- as.data.frame(filter(gse_data, TEST %in% genes_list))
# Create Filtered dataset with select genes
final_data_set <- t(selected_genes)
colnames(final_data_set) <- final_data_set[1, ]
final_data_set <- as.data.frame(final_data_set)
final_data_set$test <- rownames(final_data_set)
final_data_set <- final_data_set[ ,c('test','control', 'class', colnames(final_data_set)[!(colnames(final_data_set) %in% c('test','control', 'class'))])]
final_data_set <- final_data_set[-1, ]
output_file_path <- paste(base_path, output_file_name, sep='')
write.table(final_data_set, file=output_file_path, row.names = FALSE, sep=',', quote=FALSE)
You can’t perform that action at this time.