Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
# ==========
# = Basics =
# ==========
# Mathematical operations
# Parentheses: ()
# Exponent: ^ or **
# Multiply: *
# Divide: /
# Addition: +
# Subtraction: -
# R follows PEMDAS order of oprations
# Assignment operator: <-
# variable <- value to be assigned (order matters!)
x <- 1/40 # Assign x to the value of 0.025
x <- x + 1 # Increment x
# ===========
# = Vectors =
# ===========
1:5 # will print 1 2 3 4 5
# Can assign vector to variable
var <- 1:5
# Can use vectors to loop or create functions
2^(1:5) # will print 2 4 8 16 32
2^var # will also print 2 4 8 16 32
# Can use 2 vectors at the same time
y <- 1:4
z <- 5:8
y + z # will print 6 8 10 12
# ======================
# = Variable Managment =
# ======================
# ls() lists variables being used
ls()
# rm() removes a variable from the environment
# rm("variable_name") works the same as rm(variable_name)
x2 <- x
rm(x)
rm("x2")
# One option for rm is list
# List will remove all variables in that list
# rm(list = ls()) will remove all current variables
# rm(list = c("var1", "var2", "var3")) will remove var1, var2, var3
# ======================
# = Package Management =
# ======================
# To find out what packages you have installed, use
# installed.packages()
installed.packages()
# To install a package, use
# install.packages("package_name")
# Each package will only need to be installed on your computer once
install.packages("ggplot2")
install.packages("dplyr")
install.packages("gapminder")
# To update a package, use
# update.packages("package_name")
# To load a package, use
# library("package_name")
# Packages will need to be loaded in each script
library("ggplot2")
library("dplyr")
library("gapminder")
# ================
# = Getting Help =
# ================
# To get more info about a function, use
# ?function_name
# or
# help(function_name)
?log
help(exp)
# If you don't know the exact name of the function, use
# ??function_name_ish
??read.tab # returns information for functions
# readr::read_table
# base::base-defunct
# utils::read.table <= yay!
# You can use ? operator to get information about
# operators as well
?"<-" # returns info about the assignment operator
# To get information about a package, use
# vignette("package_name")
# using vignette() will show lottttttts of info
vignette("dplyr") # shows info about the dplyr package
# ===================
# = Data Structures =
# ===================
# Cats Data Frame Example
# Note: c() is for combine
cats <- data.frame(coat = c("calico", "black", "tabby"),
weight = c(2.1, 5.0, 3.4),
likes_string = c(1, 0, 1))
cats # will show data frame table
# View prettier table by double-clicking "cats" in
# environment panel or use
# View(data_frame_name)
View(cats)
# ===============================
# = Reading and Writing to File =
# ===============================
# To output cat data to a .csv file, use
# write.csv()
write.csv(cats, "data/meow.csv")
# To read this back, use
# read.csv()
felines <- read.csv("data/meow.csv")
# Note that read.csv() creates a new variable based on
# row numbers in the data.
# Based on the above, our cats data and our felines data
# has the same information, but cats has 3 variables
# and felines has 4 variables (3 + row numbers)
# To print a row of data, use
# data_frame_name$row_name
cats$weight # displays weight row of data as a vector
felines$coat # displays coat row of data as a vector
# Can perform operations on data in the data frame
# Example: Convert weight from kg to lbs
cats$weight <- cats$weight * 2.2
# ==============
# = Data Types =
# ==============
# To see the data type of a variable, use
# class(variable_name)
class(cats$coat) # will display "factor"
class(cats$weight) # will display "numeric"
class(cats) # will display "data.frame"
# To see the data, data type within a data frame, use
# structure function:
# str(variable_name)
str(cats)
# ================
# = More Vectors =
# ================
num_vector <- c(1, 3, 5)
chr_vector <- c("a", "b", "c")
chr_vector2 <- c("d", "e", "f")
# combining vectors of characters creates a longer
# character vector
comb_vector <- c(chr_vector, chr_vector2)
# combining vectors of numbers adds the numbers together
num_vector2 <- c(100, 10, 20)
num_vector + num_vector2 # will display (101, 13, 25)
# creating numerical vectors
# can use : and seq()
my_series <- 1:10
my_series2 <- seq(10)
# seq() can have a different step, or spacing, between
# elements
new_step <- seq(from = 1, to = 10, by = 0.1)
# can name each item in a vector
named_vector <- 5:8 # original vector
naming_vector <- c("a", "b", "c", "d") # vector of names
names(named_vector) <- naming_vector # combining them
named_vector # display!
# When adding number vectors of different lengths,
# the shorter vector repeats itself until it is as long
# as the longer vector, and then they add.
long_num_vector <- 0:9
short_num_vector <- 0:1
long_num_vector + short_num_vector
# something different happens when naming
# after the end of the naming vector, names are blank
larger_than_alphabet <- 1:30
names(larger_than_alphabet) <- LETTERS
larger_than_alphabet
# =============
# = Real Data =
# =============
# load it from csv
gapminder <- read.csv("data/gapminder_data.csv")
# alternative way to load the data
library(gapminder) # load the package
data("gapminder") # load data frame called "gapminder"
attach(gapminder) # copy gapminder into our environment
# learn about data
str(gapminder) # types of data in the data frame
nrow(gapminder) # number of rows of data
ncol(gapminder) # number of columns of data
dim(gapminder) # alternative way to see dimensions
# of frame: nrow <tab> ncol
colnames(gapminder) # names of columns; variable names
# ================
# = Data Subsets =
# ================
x <- c(5.4, 6.2, 7.1, 4.8, 7.5)
names(x) <- letters[1:5] # Using [] selects part of a variable or data frame
x
x[1] # returns first value of x (a 5.4)
x[3] # returns third value of x (c 7.1)
x[c(1, 3)] # returns first and third value of x
# (a 5.4 c 7.1)
x[1:4] # returns all values of x from 1 to 4
# (a 5.4 b 6.2 c 7.1 d 4.8)
x[6] # returns nothing; no sixth value
x[-2] # returns all of x except second element
x[-(2:4)] # returns all of x except element 2-4
# head shows the first 6 rows of the table and variables
# can change to a different number of rows if necessary
head(gapminder) # shows
head(gapminder["pop"]) # shows first 6 populations
head(gapminder[,5]) # shows first 6 rows of 5th col
# First element of square braces determine rows [rows, columns]
gapminder[3,] # show all columns of the third row
# Show the life expectancy on line 138
gapminder[138,"lifeExp"]
# Show first six rows of the double type columns
head(gapminder[c(4, 6)])
gapminder[1:6, c(4, 6)]
gapminder[1:6, c("lifeExp", "gdpPercap")]
# Subset into a country
albania <- gapminder[13:24, 1:6]
# Exercise:
# Find a subset for Afghanistan
# Add a new column to that subset for GDP
# Calculate GDP by multiplying population by GDP Per Capita
# Save new subset into a .csv file in your data folder
afghanistan <- gapminder[1:12, ]
afghanistan$gdp <- afghanistan$gdpPercap * afghanistan$pop
View(afghanistan) # Check out what we've done to confirm
write.csv(afghanistan,"data/results.csv")
# =========
# = Plots =
# =========
# Plot Prep - making sure your data is present in your environment
library(ggplot2)
library(dplyr)
install.packages("tidyr")
library(tidyr)
install.packages("knitr")
library(knitr)
gapminder <- read.csv("data/gapminder_data.csv")
head(gapminder)
# Troubleshooting
getwd() # find current working directory
setwd("./data/") # change working directory to where your data is
# Note that "./data/" is a relative path, so your setwd() path
# may be different based on your current working directory
# Plot command
# Set data to your data frame
# Second factor - aes is for aesthetics
# geom_point() adds your data to the plot in the form of scatterplot
# geom_line() adds data to the plot in the form of a line graph
ggplot(data = gapminder,
aes(x = gdpPercap, # set x axis
y = lifeExp, # set y axis
color = continent, # set color by continent data
by = country # set line by country
)) + geom_line() # display data in a line graph
+ geom_point() # add another layer - scatterplot
# Layering plots
# Layers one on top of another, same aesthetics
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp, color = continent,
by = country)) + geom_line() + geom_point()
# aes, color specific to different plots is possible
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp, by = country)) +
geom_line(aes(color = continent)) + geom_point(color = "blue")
# Change to logarithmic scale by adding a scale_x_log10() function
# Alpha changes the points' transparency, which helps when points land on top
# of each other
# Alpha scale: 1.0 is solid; 0.0 is completely transparent / invisible
ggplot(data = gapminder,
aes(x = gdpPercap, y = lifeExp, color = continent, by = country)) +
geom_point(alpha = 0.5) +
scale_x_log10()
# Add a regression line using geom_smooth()
# lm = linear model
# gray lines around the regression shows a confidence interval
ggplot(data = gapminder,
aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point(alpha = 0.5, aes(shape = continent)) +
scale_x_log10() +
geom_smooth(method = "lm")
# Remove a legend by doing show.legend = FALSE at the layer you're
# most interested in
ggplot(data = gapminder,
aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point(alpha = 0.5, aes(shape = continent), show.legend = FALSE) +
scale_x_log10() +
geom_smooth(method = "lm")
# turn a legend off completely in theme
# clean up for publication
# add scale_y_continuous() with options for y scale that allow you to change
# to percent and change the intervals
# add theme_bw() to remove background color
# add ggtitle() to add a title
# add xlab() to add a label to the x axis
# add ylab() to add a label to the y axis
ggplot(data = gapminder,
aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point(aes(shape = continent), size = 2, alpha = 0.25) +
scale_x_log10() +
geom_smooth(method = "lm") +
scale_y_continuous(limits = c(0,100), breaks = seq(0, 100, by = 10)) +
theme_bw() +
ggtitle("Interaction of GDP Per Capita and Life Expectancy") +
xlab("GDP Per Capita ($)") +
ylab("Life Expectancy (Years)")
# Export the plot
ggsave(file = "life_expectancy.png")
ggsave(file = "life_expectancy.pdf")
# Troubleshooting the images
ggsave(file = "life_exp.png", width = 20, height = 15, units = "cm")
# Boxplot example
ggplot(data=gapminder, aes(x = continent, y = lifeExp)) + geom_boxplot() +
geom_jitter(alpha = 0.5, color = "tomato")
# ================
# = Conditionals =
# ================
# If / Else Statements
number <- 37
if (number > 100) {
print("ERMAGERD bigger than 100!")
} else {
print("Not so big")
}
# Comparison
# Greater than: >
# Greater than or equal to: >=
# Equal to: ==
# Less than: <
# Less than or equal to: <=
# Not equal to: !=
# If / Else If / Else Chains
# Else is a good catch-all or default statment
number <- 37
if (number > 0) {
print(1)
} else if (number < 0) {
print(-1)
} else {
print(0)
}
# Loops
# Each of the first two loops print the numbers 1-10
numbers <- 1:10
for (number in numbers) {
print(number)
}
for (i in 1:10) {
print(i)
}
for (i in letters) {
print(i)
}
# sum
sum <- 0
vector <- c(4, 8, 15, 16, 23, 42)
for (i in vector) {
sum -> sum + i
}
print(sum)
# =====================
# = Writing Functions =
# =====================
# Reading R's Functions
# Call a function without arguments and it will display
# the function's source code
nrow # displays source code for nrow
# Function format
# name_of_function <- function(variables_function_needs) {
# function_here
# return(data_to_return)
# }
# Example
fahr_to_kelvin <- function(f_temp){
kelvin <- ((f_temp - 32) * (5 / 9)) + 273.15
return(kelvin)
}
# Make sure to call your function after you define it
# Otherwise, R won't know what your function does
# If it's not in your Environment list with your variables,
# R doesn't know it exists
# Boiling point
fahr_to_kelvin(212) # returns 373.15, the boiling point in kelvin
cel2fahr <- function(cel) {
fahr <- (cel * 9 / 5) + 32
return(fahr)
}
cel2fahr(70)
# =================
# = Markdown in R =
# =================
install.packages(c("rmarkdown", "formatR"))
library(dplyr)
library(gapminder)
library(tidyr)
library(knitr)
library(rmarkdown)
library(formatR)