diff --git a/big_data_analsis.R b/big_data_analsis.R deleted file mode 100644 index 2b2ccfa..0000000 --- a/big_data_analsis.R +++ /dev/null @@ -1,26 +0,0 @@ -# Plot the data - -# Imports -library(reshape2) -library(ggplot2) - -# Load the data -# Change path to file -# data_path <- '../parallel/analysis.csv' # You can use this instead -data_path <- '/home/reynaldo/Documents/School/Spring2017/HPC/project/parallel_mcmc/parallel/complete_big_analysis.csv' -perf_data <- read.csv(file=data_path, header=TRUE, sep=",") -melted_data <- melt(perf_data, id=c("trial", "block", "thread")) -five_hundred_trials <- melted_data[which(melted_data$trial == 500), ] -five_hundred_trials$thread = as.factor(five_hundred_trials$thread) - - -# Find which combination of blocks and threads works best -ours_five_hundred_trials <- five_hundred_trials[which(five_hundred_trials$variable == "our_time"), ] -ours_five_hundred_trials <- ours_five_hundred_trials[which(!is.na(ours_five_hundred_trials$thread)), ] - -line_base <- ggplot(ours_five_hundred_trials, aes(x=block, y=value, group=thread, shape=thread, color=thread)) + - geom_line() + - geom_point() -line_base <- line_base + xlab("Blocks") + ylab("Time (seconds)") + labs(colour = "Num Threads", shape="Num Threads") -line_base <- line_base + ggtitle("Blocks vs Time vs Threads") + theme(plot.title = element_text(hjust = 0.5)) -line_base \ No newline at end of file diff --git a/generate_plots/big_data_analsis.R b/generate_plots/big_data_analsis.R new file mode 100644 index 0000000..073f936 --- /dev/null +++ b/generate_plots/big_data_analsis.R @@ -0,0 +1,74 @@ +# Plot the data + +# Imports +library(reshape2) +library(ggplot2) + +# Load the data +# Change path to file +# data_path <- '../parallel/analysis.csv' # You can use this instead +data_path <- '/home/reynaldo/Documents/School/Spring2017/HPC/project/parallel_mcmc/parallel/complete_big_analysis.csv' +perf_data <- read.csv(file=data_path, header=TRUE, sep=",") +melted_data <- melt(perf_data, id=c("trial", "block", "thread")) +five_hundred_trials <- melted_data[which(melted_data$trial == 500), ] + + +# Find which combination of blocks and threads works best +five_hundred_trials$thread = as.factor(five_hundred_trials$thread) + +ours_five_hundred_trials <- five_hundred_trials[which(five_hundred_trials$variable == "our_time"), ] +ours_five_hundred_trials <- ours_five_hundred_trials[which(!is.na(ours_five_hundred_trials$thread)), ] + +line_base <- ggplot(ours_five_hundred_trials, aes(x=block, y=value, group=thread, shape=thread, color=thread)) + + geom_line() + + geom_point() +line_base <- line_base + xlab("Blocks") + ylab("Time (seconds)") + labs(colour = "Num Threads", shape="Num Threads") +line_base <- line_base + ggtitle("Blocks vs Time vs Threads") + theme(plot.title = element_text(hjust = 0.5)) +line_base + +# Best Number of threads vs The other implementations for Blocks vs Time +five_hundred_trials$thread = as.factor(five_hundred_trials$thread) +one_threads_500_trials <- five_hundred_trials[which(five_hundred_trials$thread == 1), ] + +line_base <- ggplot(one_threads_500_trials, aes(x=block, y=value, group=variable, shape=variable, color=variable)) + + geom_line() + + geom_point() +line_base <- line_base + xlab("Blocks") + ylab("Time (seconds)") + labs(colour = "Implementations", shape="Implementations") +line_base <- line_base + ggtitle("Blocks vs Time vs Implementation") + theme(plot.title = element_text(hjust = 0.5)) +line_base + + +# Find which combination of blocks and threads works best +five_hundred_trials$thread = as.factor(five_hundred_trials$thread) +ours_five_hundred_trials <- five_hundred_trials[which(five_hundred_trials$variable == "our_time"), ] +ours_five_hundred_trials <- ours_five_hundred_trials[which(!is.na(ours_five_hundred_trials$thread)), ] + +line_base <- ggplot(ours_five_hundred_trials, aes(x=block, y=value, group=thread, shape=thread, color=thread)) + + geom_line() + + geom_point() +line_base <- line_base + xlab("Blocks") + ylab("Time (seconds)") + labs(colour = "Num Threads", shape="Num Threads") +line_base <- line_base + ggtitle("Blocks vs Time vs Threads") + theme(plot.title = element_text(hjust = 0.5)) +line_base + + +# Create Scatter plot (Blocks vs Threads) +five_hundred_trials$block = as.factor(five_hundred_trials$block) + +base_plot <- ggplot(five_hundred_trials, aes(x=block, y=time, color=variable)) +base_plot <- base_plot + geom_point(aes(x=block, y=thread, size=value, color=variable)) +base_plot <- base_plot + scale_size(range = c(0, 10)) +base_plot <- base_plot + xlab("Blocks") + ylab("Time (seconds)") + labs(colour = "Implementation", size="Time") +base_plot <- base_plot + ggtitle("Blocks vs Time vs Implementation") + theme(plot.title = element_text(hjust = 0.5)) +base_plot + + +# Find which combination of blocks and threads works best +ours_five_hundred_trials <- five_hundred_trials[which(five_hundred_trials$variable == "our_time" & five_hundred_trials$block ==1 & five_hundred_trials$thread < 64 ), ] +ours_five_hundred_trials$thread <- as.factor(ours_five_hundred_trials$thread) +ours_five_hundred_trials <- ours_five_hundred_trials[which(!is.na(ours_five_hundred_trials$thread)), ] + +bar_base <- ggplot(ours_five_hundred_trials, aes(x=thread, y=value, fill=thread)) + geom_col(width=0.5) +bar_base <- bar_base + xlab("Threads") + ylab("Time (seconds)") +bar_base <- bar_base + ggtitle("Big Data Threads vs Time (1 Block)") + theme(plot.title = element_text(hjust = 0.5)) +bar_base <- bar_base + theme(legend.position="none") +bar_base diff --git a/data_analysis.R b/generate_plots/data_analysis.R similarity index 71% rename from data_analysis.R rename to generate_plots/data_analysis.R index 390e2b6..37bef7d 100644 --- a/data_analysis.R +++ b/generate_plots/data_analysis.R @@ -6,7 +6,8 @@ library(ggplot2) # Load the data # Change path to file -# data_path <- '../parallel/analysis.csv' # You can use this instead +# data_path <- '../parallel/complete_analysis.csv' # You can use this instead +# data_path <- '/home/reynaldo/Documents/School/Spring2017/HPC/project/parallel_mcmc/parallel/complete_analysisSmallFast.csv' data_path <- '/home/reynaldo/Documents/School/Spring2017/HPC/project/parallel_mcmc/parallel/complete_analysis.csv' perf_data <- read.csv(file=data_path, header=TRUE, sep=",") melted_data <- melt(perf_data, id=c("trial", "block", "thread")) @@ -20,7 +21,7 @@ five_hundred_trials$thread = as.factor(five_hundred_trials$thread) base_plot <- ggplot(five_hundred_trials, aes(x=block, y=time, color=variable)) base_plot <- base_plot + geom_point(aes(x=block, y=thread, size=value, color=variable)) base_plot <- base_plot + scale_size(range = c(0, 10)) -base_plot <- base_plot + xlab("Threads") + ylab("Time (seconds)") +base_plot <- base_plot + xlab("Blocks") + ylab("Threads") + labs(colour = "Implementation", size="Time") base_plot <- base_plot + ggtitle("Blocks vs Time") + theme(plot.title = element_text(hjust = 0.5)) base_plot @@ -47,4 +48,12 @@ line_base <- line_base + ggtitle("Blocks vs Time vs Implementation") + theme(plo line_base +# Trials vs Time vs Implementation +one_thread_one_block <- melted_data[which(melted_data$thread == 1 & melted_data$block == 1), ] +line_base <- ggplot(one_thread_one_block, aes(x=trial, y=value, group=variable, shape=variable, color=variable)) + + geom_line() + + geom_point() +line_base <- line_base + xlab("Trials") + ylab("Time (seconds)") + labs(colour = "Implementations", shape="Implementations") +line_base <- line_base + ggtitle("Small Data Trials vs Time 1 Block 1 Thread") + theme(plot.title = element_text(hjust = 0.5)) +line_base diff --git a/generate_plots/fast_data_analysis.R b/generate_plots/fast_data_analysis.R new file mode 100644 index 0000000..c05e3e3 --- /dev/null +++ b/generate_plots/fast_data_analysis.R @@ -0,0 +1,71 @@ +# Plot the data + +# Imports +library(reshape2) +library(ggplot2) + +# Load the data +# Change path to file +# data_path <- '../parallel/analysis.csv' # You can use this instead +data_path <- '/home/reynaldo/Documents/School/Spring2017/HPC/project/parallel_mcmc/parallel/complete_analysis.csv' +perf_data <- read.csv(file=data_path, header=TRUE, sep=",") +melted_data <- melt(perf_data, id=c("trial", "block", "thread")) +five_hundred_trials <- melted_data[which(melted_data$trial == 500), ] +five_hundred_trials$thread = as.factor(five_hundred_trials$thread) + +data_path <- '/home/reynaldo/Documents/School/Spring2017/HPC/project/parallel_mcmc/parallel/complete_analysisSmallFast.csv' +fast_perf_data <- read.csv(file=data_path, header=TRUE, sep=",") +fast_melted_data <- melt(fast_perf_data, id=c("trial", "block", "thread")) +fast_five_hundred_trials <- fast_melted_data[which(fast_melted_data$trial == 500), ] +fast_five_hundred_trials$thread = as.factor(fast_five_hundred_trials$thread) + + + +# Generate Plots + +# Create Scatter plot (Blocks vs Threads) +base_plot <- ggplot(fast_five_hundred_trials, aes(x=block, y=time, color=variable)) +base_plot <- base_plot + geom_point(aes(x=block, y=thread, size=value, color=variable)) +base_plot <- base_plot + scale_size(range = c(0, 10)) +base_plot <- base_plot + xlab("Blocks") + ylab("Time (seconds)") + labs(colour = "Implementation", size="Time") +base_plot <- base_plot + ggtitle("Faster Blocks vs Time") + theme(plot.title = element_text(hjust = 0.5)) +base_plot + +# Find which combination of blocks and threads works best +fast_ours_five_hundred_trials <- fast_five_hundred_trials[which(fast_five_hundred_trials$variable == "our_time"), ] +fast_ours_five_hundred_trials <- fast_ours_five_hundred_trials[which(!is.na(fast_ours_five_hundred_trials$thread)), ] + +line_base <- ggplot(fast_ours_five_hundred_trials, aes(x=block, y=value, group=thread, shape=thread, color=thread)) + + geom_line() + + geom_point() +line_base <- line_base + xlab("Blocks") + ylab("Time (seconds)") + labs(colour = "Num Threads", shape="Num Threads") +line_base <- line_base + ggtitle("Faster Blocks vs Time vs Threads") + theme(plot.title = element_text(hjust = 0.5)) +line_base + + +# Merge two datasets +our_fast_time_melted = fast_melted_data[(which(fast_melted_data$variable == "our_fast_time")), ] +all_melted_data <- rbind(melted_data, our_fast_time_melted) +all_five_hundred_trials <- all_melted_data[which(all_melted_data$trial == 500), ] + + +# Best Number of threads vs The other implementations for Blocks vs Time +one_threads_500_trials <- all_five_hundred_trials[which(all_five_hundred_trials$thread == 1), ] + +line_base <- ggplot(one_threads_500_trials, aes(x=block, y=value, group=variable, shape=variable, color=variable)) + + geom_line() + + geom_point() +line_base <- line_base + xlab("Blocks") + ylab("Time (seconds)") + labs(colour = "Implementations", shape="Implementations") +line_base <- line_base + ggtitle("Blocks vs Time vs Implementation") + theme(plot.title = element_text(hjust = 0.5)) +line_base + + +# Trials vs Time vs Implementation +one_thread_one_block <- all_melted_data[which(all_melted_data$thread == 1 & all_melted_data$block == 1), ] + +line_base <- ggplot(one_thread_one_block, aes(x=trial, y=value, group=variable, shape=variable, color=variable)) + + geom_line() + + geom_point() +line_base <- line_base + xlab("Trials") + ylab("Time (seconds)") + labs(colour = "Implementations", shape="Implementations") +line_base <- line_base + ggtitle("Small Data Trials vs Time 1 Block 1 Thread") + theme(plot.title = element_text(hjust = 0.5)) +line_base diff --git a/parallel/complete_analysisSmallFast.csv b/parallel/complete_analysisSmallFast.csv new file mode 100644 index 0000000..538ece3 --- /dev/null +++ b/parallel/complete_analysisSmallFast.csv @@ -0,0 +1,125 @@ +trial,block,thread,seq_time,our_fast_time,their_time +500,1,1,1.30,11.36,5.02 +500,1,2,1.25,7.97,5.03 +500,1,4,1.25,6.46,5.06 +500,1,8,1.27,5.72,4.93 +500,1,16,1.25,5.82,5.17 +500,1,32,1.26,7.64,4.96 +500,2,1,1.25,7.84,5.30 +500,2,2,1.25,6.21,4.96 +500,2,4,1.35,5.79,5.19 +500,2,8,1.25,5.98,4.95 +500,2,16,1.25,7.60,5.02 +500,2,32,1.25,15.66,5.10 +500,3,1,1.26,6.67,4.97 +500,3,2,1.31,5.84,5.29 +500,3,4,1.25,5.60,5.02 +500,3,8,1.30,6.67,5.23 +500,3,16,1.27,10.95,4.96 +500,3,32,1.27,29.33,5.11 +500,4,1,1.27,6.13,4.98 +500,4,2,1.30,6.03,5.44 +500,4,4,1.25,6.38,5.00 +500,4,8,1.29,7.56,4.97 +500,4,16,1.25,15.77,4.99 +500,4,32,1.35,48.40,5.03 +500,5,1,1.24,5.93,4.90 +500,5,2,1.29,5.66,5.27 +500,5,4,1.24,6.10,5.13 +500,5,8,1.25,9.07,4.99 +500,5,16,1.24,21.89,5.27 +500,5,32,1.25,73.22,4.97 +500,6,1,1.30,5.89,5.02 +500,6,2,1.25,5.46,4.97 +500,6,4,1.31,6.46,5.05 +500,6,8,1.25,10.91,5.06 +500,6,16,1.25,29.22,5.03 +500,6,32,1.25,102.84,5.09 +500,7,1,1.24,5.65,5.10 +500,7,2,1.25,5.62,5.19 +500,7,4,1.24,7.00,4.93 +500,7,8,1.23,13.18,5.05 +500,7,16,1.31,38.55,4.92 +500,7,32,1.23,138.31,4.98 +500,8,1,1.24,5.48,5.67 +500,8,2,1.25,5.71,5.06 +500,8,4,1.25,7.63,5.03 +500,8,8,1.28,15.58,4.99 +500,8,16,1.23,48.45,5.15 +500,8,32,1.25,179.02,4.92 +500,9,1,1.25,5.56,5.30 +500,9,2,1.25,6.51,5.07 +500,9,4,1.26,8.76,4.94 +500,9,8,1.25,18.86,5.06 +500,9,16,1.25,60.05,5.05 +500,9,32,1.24,225.00,5.07 +500,10,1,1.23,5.53,4.94 +500,10,2,1.23,6.13,4.99 +500,10,4,1.25,9.13,5.00 +500,10,8,1.27,21.88,4.93 +500,10,16,1.25,72.91,5.01 +500,10,32,1.24,276.43,4.95 +1000,1,1,2.49,17.57,5.42 +1000,1,2,2.49,11.41,5.60 +1000,1,4,2.48,8.28,5.42 +1000,1,8,2.48,7.02,5.88 +1000,1,16,2.48,7.12,5.40 +1000,1,32,2.62,10.85,5.71 +1000,2,1,2.54,11.04,5.48 +1000,2,2,2.49,8.01,5.65 +1000,2,4,2.48,6.69,5.39 +1000,2,8,2.56,7.11,5.71 +1000,2,16,2.49,10.78,5.47 +1000,2,32,2.48,27.08,5.70 +1000,3,1,2.48,9.22,5.50 +1000,3,2,2.49,7.04,5.41 +1000,3,4,2.49,6.66,5.51 +1000,3,8,2.51,8.34,5.48 +1000,3,16,2.51,17.51,5.66 +1000,3,32,2.51,54.22,5.48 +1000,4,1,2.50,8.07,5.63 +1000,4,2,2.51,6.73,5.42 +1000,4,4,2.51,7.13,5.53 +1000,4,8,2.49,11.51,5.52 +1000,4,16,2.52,30.24,5.72 +1000,4,32,2.52,92.38,5.75 +1000,5,1,2.55,7.37,5.42 +1000,5,2,2.52,6.84,6.10 +1000,5,4,2.52,7.76,5.88 +1000,5,8,2.51,13.70,5.40 +1000,5,16,2.51,39.26,5.60 +1000,5,32,2.48,140.90,5.45 +1000,6,1,2.49,6.93,5.43 +1000,6,2,2.49,6.59,5.58 +1000,6,4,2.49,8.45,5.45 +1000,6,8,2.53,17.31,5.49 +1000,6,16,2.52,54.23,5.57 +1000,6,32,2.51,201.19,5.43 +1000,7,1,2.52,6.90,5.48 +1000,7,2,2.51,6.74,5.66 +1000,7,4,2.51,9.92,5.49 +1000,7,8,2.51,21.70,5.67 +1000,7,16,2.51,71.72,5.80 +1000,7,32,2.51,272.19,5.45 +1000,8,1,2.50,6.61,5.49 +1000,8,2,2.53,6.99,5.54 +1000,8,4,2.54,10.56,5.54 +1000,8,8,2.53,26.87,5.46 +1000,8,16,2.52,92.36,5.53 +1000,8,32,2.51,353.73,5.53 +1000,9,1,2.53,6.82,5.61 +1000,9,2,2.50,7.87,5.52 +1000,9,4,2.51,12.98,5.47 +1000,9,8,2.48,32.81,6.09 +1000,9,16,2.51,115.26,5.48 +1000,9,32,2.51,445.90,5.41 +1000,10,1,2.51,6.44,5.38 +1000,10,2,2.48,7.71,5.67 +1000,10,4,2.49,14.83,5.46 +1000,10,8,2.49,39.36,5.42 +1000,10,16,2.51,141.47,5.54 +1000,10,32,2.52,549.02,3.46 +1500,1,1,3.76,22.15,4.00 +1500,1,2,3.74,12.74,3.98 +1500,1,4,3.73,8.01,4.00 +1500,1,8,3.73,6.14,3.96 diff --git a/parallel/empty_file b/parallel/empty_file deleted file mode 100644 index e69de29..0000000