Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
RinGit/R_Basics.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
509 lines (389 sloc)
12.8 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ========== | |
# = Basics = | |
# ========== | |
# Mathematical operations | |
# Parentheses: () | |
# Exponent: ^ or ** | |
# Multiply: * | |
# Divide: / | |
# Addition: + | |
# Subtraction: - | |
# R follows PEMDAS order of oprations | |
# Assignment operator: <- | |
# variable <- value to be assigned (order matters!) | |
x <- 1/40 # Assign x to the value of 0.025 | |
x <- x + 1 # Increment x | |
# =========== | |
# = Vectors = | |
# =========== | |
1:5 # will print 1 2 3 4 5 | |
# Can assign vector to variable | |
var <- 1:5 | |
# Can use vectors to loop or create functions | |
2^(1:5) # will print 2 4 8 16 32 | |
2^var # will also print 2 4 8 16 32 | |
# Can use 2 vectors at the same time | |
y <- 1:4 | |
z <- 5:8 | |
y + z # will print 6 8 10 12 | |
# ====================== | |
# = Variable Managment = | |
# ====================== | |
# ls() lists variables being used | |
ls() | |
# rm() removes a variable from the environment | |
# rm("variable_name") works the same as rm(variable_name) | |
x2 <- x | |
rm(x) | |
rm("x2") | |
# One option for rm is list | |
# List will remove all variables in that list | |
# rm(list = ls()) will remove all current variables | |
# rm(list = c("var1", "var2", "var3")) will remove var1, var2, var3 | |
# ====================== | |
# = Package Management = | |
# ====================== | |
# To find out what packages you have installed, use | |
# installed.packages() | |
installed.packages() | |
# To install a package, use | |
# install.packages("package_name") | |
# Each package will only need to be installed on your computer once | |
install.packages("ggplot2") | |
install.packages("dplyr") | |
install.packages("gapminder") | |
# To update a package, use | |
# update.packages("package_name") | |
# To load a package, use | |
# library("package_name") | |
# Packages will need to be loaded in each script | |
library("ggplot2") | |
library("dplyr") | |
library("gapminder") | |
# ================ | |
# = Getting Help = | |
# ================ | |
# To get more info about a function, use | |
# ?function_name | |
# or | |
# help(function_name) | |
?log | |
help(exp) | |
# If you don't know the exact name of the function, use | |
# ??function_name_ish | |
??read.tab # returns information for functions | |
# readr::read_table | |
# base::base-defunct | |
# utils::read.table <= yay! | |
# You can use ? operator to get information about | |
# operators as well | |
?"<-" # returns info about the assignment operator | |
# To get information about a package, use | |
# vignette("package_name") | |
# using vignette() will show lottttttts of info | |
vignette("dplyr") # shows info about the dplyr package | |
# =================== | |
# = Data Structures = | |
# =================== | |
# Cats Data Frame Example | |
# Note: c() is for combine | |
cats <- data.frame(coat = c("calico", "black", "tabby"), | |
weight = c(2.1, 5.0, 3.4), | |
likes_string = c(1, 0, 1)) | |
cats # will show data frame table | |
# View prettier table by double-clicking "cats" in | |
# environment panel or use | |
# View(data_frame_name) | |
View(cats) | |
# =============================== | |
# = Reading and Writing to File = | |
# =============================== | |
# To output cat data to a .csv file, use | |
# write.csv() | |
write.csv(cats, "data/meow.csv") | |
# To read this back, use | |
# read.csv() | |
felines <- read.csv("data/meow.csv") | |
# Note that read.csv() creates a new variable based on | |
# row numbers in the data. | |
# Based on the above, our cats data and our felines data | |
# has the same information, but cats has 3 variables | |
# and felines has 4 variables (3 + row numbers) | |
# To print a row of data, use | |
# data_frame_name$row_name | |
cats$weight # displays weight row of data as a vector | |
felines$coat # displays coat row of data as a vector | |
# Can perform operations on data in the data frame | |
# Example: Convert weight from kg to lbs | |
cats$weight <- cats$weight * 2.2 | |
# ============== | |
# = Data Types = | |
# ============== | |
# To see the data type of a variable, use | |
# class(variable_name) | |
class(cats$coat) # will display "factor" | |
class(cats$weight) # will display "numeric" | |
class(cats) # will display "data.frame" | |
# To see the data, data type within a data frame, use | |
# structure function: | |
# str(variable_name) | |
str(cats) | |
# ================ | |
# = More Vectors = | |
# ================ | |
num_vector <- c(1, 3, 5) | |
chr_vector <- c("a", "b", "c") | |
chr_vector2 <- c("d", "e", "f") | |
# combining vectors of characters creates a longer | |
# character vector | |
comb_vector <- c(chr_vector, chr_vector2) | |
# combining vectors of numbers adds the numbers together | |
num_vector2 <- c(100, 10, 20) | |
num_vector + num_vector2 # will display (101, 13, 25) | |
# creating numerical vectors | |
# can use : and seq() | |
my_series <- 1:10 | |
my_series2 <- seq(10) | |
# seq() can have a different step, or spacing, between | |
# elements | |
new_step <- seq(from = 1, to = 10, by = 0.1) | |
# can name each item in a vector | |
named_vector <- 5:8 # original vector | |
naming_vector <- c("a", "b", "c", "d") # vector of names | |
names(named_vector) <- naming_vector # combining them | |
named_vector # display! | |
# When adding number vectors of different lengths, | |
# the shorter vector repeats itself until it is as long | |
# as the longer vector, and then they add. | |
long_num_vector <- 0:9 | |
short_num_vector <- 0:1 | |
long_num_vector + short_num_vector | |
# something different happens when naming | |
# after the end of the naming vector, names are blank | |
larger_than_alphabet <- 1:30 | |
names(larger_than_alphabet) <- LETTERS | |
larger_than_alphabet | |
# ============= | |
# = Real Data = | |
# ============= | |
# load it from csv | |
gapminder <- read.csv("data/gapminder_data.csv") | |
# alternative way to load the data | |
library(gapminder) # load the package | |
data("gapminder") # load data frame called "gapminder" | |
attach(gapminder) # copy gapminder into our environment | |
# learn about data | |
str(gapminder) # types of data in the data frame | |
nrow(gapminder) # number of rows of data | |
ncol(gapminder) # number of columns of data | |
dim(gapminder) # alternative way to see dimensions | |
# of frame: nrow <tab> ncol | |
colnames(gapminder) # names of columns; variable names | |
# ================ | |
# = Data Subsets = | |
# ================ | |
x <- c(5.4, 6.2, 7.1, 4.8, 7.5) | |
names(x) <- letters[1:5] # Using [] selects part of a variable or data frame | |
x | |
x[1] # returns first value of x (a 5.4) | |
x[3] # returns third value of x (c 7.1) | |
x[c(1, 3)] # returns first and third value of x | |
# (a 5.4 c 7.1) | |
x[1:4] # returns all values of x from 1 to 4 | |
# (a 5.4 b 6.2 c 7.1 d 4.8) | |
x[6] # returns nothing; no sixth value | |
x[-2] # returns all of x except second element | |
x[-(2:4)] # returns all of x except element 2-4 | |
# head shows the first 6 rows of the table and variables | |
# can change to a different number of rows if necessary | |
head(gapminder) # shows | |
head(gapminder["pop"]) # shows first 6 populations | |
head(gapminder[,5]) # shows first 6 rows of 5th col | |
# First element of square braces determine rows [rows, columns] | |
gapminder[3,] # show all columns of the third row | |
# Show the life expectancy on line 138 | |
gapminder[138,"lifeExp"] | |
# Show first six rows of the double type columns | |
head(gapminder[c(4, 6)]) | |
gapminder[1:6, c(4, 6)] | |
gapminder[1:6, c("lifeExp", "gdpPercap")] | |
# Subset into a country | |
albania <- gapminder[13:24, 1:6] | |
# Exercise: | |
# Find a subset for Afghanistan | |
# Add a new column to that subset for GDP | |
# Calculate GDP by multiplying population by GDP Per Capita | |
# Save new subset into a .csv file in your data folder | |
afghanistan <- gapminder[1:12, ] | |
afghanistan$gdp <- afghanistan$gdpPercap * afghanistan$pop | |
View(afghanistan) # Check out what we've done to confirm | |
write.csv(afghanistan,"data/results.csv") | |
# ========= | |
# = Plots = | |
# ========= | |
# Plot Prep - making sure your data is present in your environment | |
library(ggplot2) | |
library(dplyr) | |
install.packages("tidyr") | |
library(tidyr) | |
install.packages("knitr") | |
library(knitr) | |
gapminder <- read.csv("data/gapminder_data.csv") | |
head(gapminder) | |
# Troubleshooting | |
getwd() # find current working directory | |
setwd("./data/") # change working directory to where your data is | |
# Note that "./data/" is a relative path, so your setwd() path | |
# may be different based on your current working directory | |
# Plot command | |
# Set data to your data frame | |
# Second factor - aes is for aesthetics | |
# geom_point() adds your data to the plot in the form of scatterplot | |
# geom_line() adds data to the plot in the form of a line graph | |
ggplot(data = gapminder, | |
aes(x = gdpPercap, # set x axis | |
y = lifeExp, # set y axis | |
color = continent, # set color by continent data | |
by = country # set line by country | |
)) + geom_line() # display data in a line graph | |
+ geom_point() # add another layer - scatterplot | |
# Layering plots | |
# Layers one on top of another, same aesthetics | |
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp, color = continent, | |
by = country)) + geom_line() + geom_point() | |
# aes, color specific to different plots is possible | |
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp, by = country)) + | |
geom_line(aes(color = continent)) + geom_point(color = "blue") | |
# Change to logarithmic scale by adding a scale_x_log10() function | |
# Alpha changes the points' transparency, which helps when points land on top | |
# of each other | |
# Alpha scale: 1.0 is solid; 0.0 is completely transparent / invisible | |
ggplot(data = gapminder, | |
aes(x = gdpPercap, y = lifeExp, color = continent, by = country)) + | |
geom_point(alpha = 0.5) + | |
scale_x_log10() | |
# Add a regression line using geom_smooth() | |
# lm = linear model | |
# gray lines around the regression shows a confidence interval | |
ggplot(data = gapminder, | |
aes(x = gdpPercap, y = lifeExp, color = continent)) + | |
geom_point(alpha = 0.5, aes(shape = continent)) + | |
scale_x_log10() + | |
geom_smooth(method = "lm") | |
# Remove a legend by doing show.legend = FALSE at the layer you're | |
# most interested in | |
ggplot(data = gapminder, | |
aes(x = gdpPercap, y = lifeExp, color = continent)) + | |
geom_point(alpha = 0.5, aes(shape = continent), show.legend = FALSE) + | |
scale_x_log10() + | |
geom_smooth(method = "lm") | |
# turn a legend off completely in theme | |
# clean up for publication | |
# add scale_y_continuous() with options for y scale that allow you to change | |
# to percent and change the intervals | |
# add theme_bw() to remove background color | |
# add ggtitle() to add a title | |
# add xlab() to add a label to the x axis | |
# add ylab() to add a label to the y axis | |
ggplot(data = gapminder, | |
aes(x = gdpPercap, y = lifeExp, color = continent)) + | |
geom_point(aes(shape = continent), size = 2, alpha = 0.25) + | |
scale_x_log10() + | |
geom_smooth(method = "lm") + | |
scale_y_continuous(limits = c(0,100), breaks = seq(0, 100, by = 10)) + | |
theme_bw() + | |
ggtitle("Interaction of GDP Per Capita and Life Expectancy") + | |
xlab("GDP Per Capita ($)") + | |
ylab("Life Expectancy (Years)") | |
# Export the plot | |
ggsave(file = "life_expectancy.png") | |
ggsave(file = "life_expectancy.pdf") | |
# Troubleshooting the images | |
ggsave(file = "life_exp.png", width = 20, height = 15, units = "cm") | |
# Boxplot example | |
ggplot(data=gapminder, aes(x = continent, y = lifeExp)) + geom_boxplot() + | |
geom_jitter(alpha = 0.5, color = "tomato") | |
# ================ | |
# = Conditionals = | |
# ================ | |
# If / Else Statements | |
number <- 37 | |
if (number > 100) { | |
print("ERMAGERD bigger than 100!") | |
} else { | |
print("Not so big") | |
} | |
# Comparison | |
# Greater than: > | |
# Greater than or equal to: >= | |
# Equal to: == | |
# Less than: < | |
# Less than or equal to: <= | |
# Not equal to: != | |
# If / Else If / Else Chains | |
# Else is a good catch-all or default statment | |
number <- 37 | |
if (number > 0) { | |
print(1) | |
} else if (number < 0) { | |
print(-1) | |
} else { | |
print(0) | |
} | |
# Loops | |
# Each of the first two loops print the numbers 1-10 | |
numbers <- 1:10 | |
for (number in numbers) { | |
print(number) | |
} | |
for (i in 1:10) { | |
print(i) | |
} | |
for (i in letters) { | |
print(i) | |
} | |
# sum | |
sum <- 0 | |
vector <- c(4, 8, 15, 16, 23, 42) | |
for (i in vector) { | |
sum -> sum + i | |
} | |
print(sum) | |
# ===================== | |
# = Writing Functions = | |
# ===================== | |
# Reading R's Functions | |
# Call a function without arguments and it will display | |
# the function's source code | |
nrow # displays source code for nrow | |
# Function format | |
# name_of_function <- function(variables_function_needs) { | |
# function_here | |
# return(data_to_return) | |
# } | |
# Example | |
fahr_to_kelvin <- function(f_temp){ | |
kelvin <- ((f_temp - 32) * (5 / 9)) + 273.15 | |
return(kelvin) | |
} | |
# Make sure to call your function after you define it | |
# Otherwise, R won't know what your function does | |
# If it's not in your Environment list with your variables, | |
# R doesn't know it exists | |
# Boiling point | |
fahr_to_kelvin(212) # returns 373.15, the boiling point in kelvin | |
cel2fahr <- function(cel) { | |
fahr <- (cel * 9 / 5) + 32 | |
return(fahr) | |
} | |
cel2fahr(70) | |
# ================= | |
# = Markdown in R = | |
# ================= | |
install.packages(c("rmarkdown", "formatR")) | |
library(dplyr) | |
library(gapminder) | |
library(tidyr) | |
library(knitr) | |
library(rmarkdown) | |
library(formatR) | |