Skip to content
Permalink
Browse files

Got the lasso genes from Thams data. Also analyzed the distribution o…

…f those columns.
  • Loading branch information
rjm11010 committed Dec 17, 2017
1 parent 1a09133 commit 614292327071bda4fd68b169f7fcb56aa406f643
Showing with 461 additions and 55 deletions.
  1. BIN analysis/lasso_gene_analysis/acpl2.png
  2. BIN analysis/lasso_gene_analysis/adamts12.png
  3. BIN analysis/lasso_gene_analysis/alcam.png
  4. BIN analysis/lasso_gene_analysis/ccna2.png
  5. BIN analysis/lasso_gene_analysis/cebpa.png
  6. BIN analysis/lasso_gene_analysis/cenpf.png
  7. BIN analysis/lasso_gene_analysis/clec14a.png
  8. BIN analysis/lasso_gene_analysis/copz2.png
  9. BIN analysis/lasso_gene_analysis/cyp27a1.png
  10. BIN analysis/lasso_gene_analysis/ets2.png
  11. BIN analysis/lasso_gene_analysis/flnc.png
  12. BIN analysis/lasso_gene_analysis/fn1.png
  13. BIN analysis/lasso_gene_analysis/gja4.png
  14. BIN analysis/lasso_gene_analysis/glis2.png
  15. BIN analysis/lasso_gene_analysis/gna14.png
  16. BIN analysis/lasso_gene_analysis/gtf2h5.png
  17. BIN analysis/lasso_gene_analysis/gyg1.png
  18. BIN analysis/lasso_gene_analysis/hspb7.png
  19. BIN analysis/lasso_gene_analysis/il6.png
  20. BIN analysis/lasso_gene_analysis/mn1.png
  21. BIN analysis/lasso_gene_analysis/pdgfc.png
  22. BIN analysis/lasso_gene_analysis/pdk4.png
  23. BIN analysis/lasso_gene_analysis/pla2g4a.png
  24. BIN analysis/lasso_gene_analysis/rbp1.png
  25. BIN analysis/lasso_gene_analysis/rnd3.png
  26. BIN analysis/lasso_gene_analysis/s100a16.png
  27. BIN analysis/lasso_gene_analysis/sdc4.png
  28. BIN analysis/lasso_gene_analysis/serping1.png
  29. BIN analysis/lasso_gene_analysis/slc16a3.png
  30. BIN analysis/lasso_gene_analysis/sparcl1.png
  31. BIN analysis/lasso_gene_analysis/st3gal4.png
  32. BIN analysis/lasso_gene_analysis/tbxas1.png
  33. BIN analysis/lasso_gene_analysis/tmem37.png
  34. BIN analysis/lasso_gene_analysis/zbp1.png
  35. BIN analysis/odd_ones/cxcl16.png
  36. BIN analysis/odd_ones/odd_adam12.png
  37. BIN analysis/odd_ones/odd_aldh2.png
  38. BIN analysis/odd_ones/odd_bnip3.png
  39. BIN analysis/odd_ones/odd_cxcl2.png
  40. BIN analysis/odd_ones/odd_hhip.png
  41. BIN analysis/odd_ones/odd_lgals3.png
  42. BIN analysis/odd_ones/odd_ndrg2.png
  43. BIN analysis/odd_ones/odd_pla1a.png
  44. BIN analysis/odd_ones/odd_pla2g4a.png
  45. BIN analysis/odd_ones/odd_rbp4.png
  46. BIN analysis/odd_ones/odd_slc9a3r1.png
  47. +0 −40 mkdataset/datasets/gan_datasets/notes.md
  48. +366 −0 mkdataset/datasets/gan_datasets/tham_lasso_dataset.csv
  49. +54 −0 mkdataset/distribution_analysis.R
  50. +36 −0 mkdataset/human_mouse_gene_lists/lasso_gene_list.csv
  51. +5 −15 mkdataset/prep_thams_data.r
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

This file was deleted.

Large diffs are not rendered by default.

@@ -0,0 +1,54 @@
library(ggplot2)

#############################
# Load Data
#############################

base_path <- '/home/reynaldo/Documents/School/Fall2017/DataMining/grp_proj/mkdataset/'
file_name <- 'datasets/gan_datasets/tham_lasso_dataset.csv'
all_data <- read.csv(paste(base_path, file_name, sep=''), header=TRUE)

gene_list_file_name <- 'human_mouse_gene_lists/lasso_gene_list.csv'
gene_list <- read.csv(paste(base_path, gene_list_file_name, sep=''), header=TRUE)



################################
# Functions
################################

# Function from https://stackoverflow.com/questions/22742737/function-to-save-ggplot
# Special Thanks to enricoferrero
# https://stackoverflow.com/users/1540663/enricoferrero
savePlot <- function(myPlot, file_name) {
png(paste(file_name, '.png', sep=''), width=732, height=543)
print(myPlot)
dev.off()
}

################################
# Variables
################################

# Vector of fields to examine
# columns_to_exclude <- c('test', 'class')
# fields <- colnames(all_data)[match(columns_to_exclude, colnames(all_data))]


for (field in gene_list$Symbol) {
if (field %in% colnames(all_data)) {
the_title <- paste('Expression Historgram of', field)
x_lab <- paste(field, 'Expression Value')
the_plot <- ggplot(all_data, aes(x=all_data[ , field])) +
geom_histogram() +
geom_vline(xintercept=mean(all_data[ , field]), color='red', size=2) +
labs(title=the_title, x=x_lab)
print(the_plot)
savePlot(the_plot, field)

# Pause until ready for next
readline("Press Enter for next")
}
}


@@ -0,0 +1,36 @@
Symbol
gtf2h5
cebpa
gna14
copz2
slc16a3
glis2
ets2
gyg1
ccna2
rnd3
serping1
sdc4
zbp1
flnc
mn1
pla2g4a
cenpf
sparcl1
adamts12
pdgfc
s100a16
il6
pdk4
hspb7
gja4
clec14a
rbp1
st3gal4
cyp27a1
tmem37
fn1
acpl2
tbxas1
alcam
(34)
@@ -14,15 +14,15 @@ get_gene_list <- function(gene_file_name) {
}

# Parameters
selected_species = c('human', 'mouse')
output_file_name = 'tham_human_and_mouse_dataset.csv'
output_file_name = 'tham_lasso_dataset.csv'
# Create gene_list
# genes_list <- get_gene_list('human_genes.csv') # Just Human data
# genes_list <- get_gene_list('new_mouse_clean.csv') # Just mouse Data
# For intersection of both mouse and human
genes_list_human <- get_gene_list('human_genes.csv')
genes_list_mouse <- get_gene_list('new_mouse_clean.csv')
genes_list <- intersect(genes_list_human, genes_list_mouse)
# genes_list_human <- get_gene_list('human_genes.csv')
# genes_list_mouse <- get_gene_list('new_mouse_clean.csv')
# genes_list <- intersect(genes_list_human, genes_list_mouse)
genes_list <- get_gene_list('lasso_gene_list.csv')

# Get Tham's data
gse_file <- paste(base_path, 'class_data/gse70559_log2ratio_data.csv', sep='')
@@ -34,16 +34,6 @@ gse_data$TEST <- tolower(gse_data$TEST)
# Select the genes
selected_genes <- as.data.frame(filter(gse_data, TEST %in% genes_list))

# Average selected genes
# Remove the class and control rows
# classes_row <- as.data.frame(selected_genes[1, ])
# control_row <- as.data.frame(selected_genes[2, ])
# selected_genes <- selected_genes[c(-1, -2), ]
# non_symbol_columns <- colnames(selected_genes)[-1]
# selected_avg_genes <- aggregate(selected_genes[ , non_symbol_columns],
# by=list(selected_genes$TEST), data=selected_genes, FUN = mean)
# colnames(selected_avg_genes) <- c('symbol', non_symbol_columns)
# Add the class and control row back

# Create Filtered dataset with select genes
final_data_set <- t(selected_genes)

0 comments on commit 6142923

Please sign in to comment.
You can’t perform that action at this time.