hacktracking: # The basics of programming in R

# The basics of programming in R

Basic Building Blocks

> x <- 10
> z <- c(1.1, 9, 3.14)
> z * 2 + 100
> my_sqrt <- sqrt(z - 1)
> my_div <- z / my_sqrt
> c(1, 2, 3, 4) + c(0, 10)

Workspace and Files

> getwd()
> ls()
> dir()
> list.files
> dir.create(path = 'testdir')
> setwd('testdir')
> file.create('mytest.R')
> file.info('mytest.R')
> file.info('mytest.R')$size
> file.rename('mytest.R', 'mytest2.R')
> file.copy('mytest2.R', 'mytest3.R')
> file.path('folder1', 'folder2')
> dir.create(file.path('testdir2', 'testdir3'), recursive = TRUE)
> unlink('testdir2', recursive = TRUE)

Sequences of Numbers

> 1:20
> pi:10
> 15:1
> seq(0, 10, 0,5)
> seq(5, 10, length = 30)
> length(my_seq)
> rep(0, times = 40)
> rep(c(0, 1, 2), times = 10)
> rep(c(0, 1, 2), each = 10)

Vectors

> num_vect <- c(0.5, 55, -10, 6)
> tf <- num_vect < 1
> num_vect >= 6
> my_char <- c('My', 'name', 'is')
> paste(my_char, collapse = ' ')
> my_name <- c(my_char, 'R')
> my_name
> paste('Hello', 'world!', sep = ' ')
> paste(1:3, c('X', 'Y', 'Z'), sep = '')
> paste(LETTERS, 1:4, sep = '-')

Missing Values

> x <- c(44, NA, 5, NA)
> x * 3
> rnorm(1000)
> y <- rnorm(1000)
> z <- rep(NA, 1000)
> my_data <- sample(c(y, z), 100)
> my_na <- is.na(my_data)
> my_data == NA
> sum(my_na)
> 0 / 0
> Inf - Inf

Subsetting Vectors

> x[1:10]
> x[is.na(x)]
> y <- x[!is.na(x)]
> y[y > 0]
> x[x > 0]
> x[!is.na(x) & x > 0]
> x[c(3, 5, 7)]
> x[0]
> x[3000]
> x[c(-2, -10)]
> x[-c(2, 10)]
> vect <- c(foo = 11, bar = 2, norf = NA)
> names(vect)
> vect2 <- c(11, 2, NA)
> names(vect2) <- c('foo', 'bar', 'norf')
> identical(vect, vect2)
> vect['bar']
> vect[c('foo', 'bar')]

Matrices and Data Frames

> my_vector <- 1:20
> dim(my_vector)
> length(my_vector)
> dim(my_vector) <- c(4, 5)
> dim(my_vector)
> attributes(my_vector)
> class(my_vector)
> my_matrix <- my_vector
> my_matrix2 <- matrix(data = 1:20, nrow = 4, ncol = 5)
> identical(my_matrix, my_matrix2)
> patients <- c('Bill', 'Gina', 'Kelly', 'Sean')
> cbind(patients, my_matrix)
> my_data <- data.frame(patients, my_matrix)
> class(my_data)
> cnames <- c('patient', 'age', 'weight', 'bp', 'rating', 'test')
> colnames(my_data) <- cnames

Logic

> TRUE == TRUE
> (FALSE == TRUE) == FALSE
> 6 == 7
> 6 < 7
> 10 <= 10
> 5 != 7
> !(5 == 7)
> FALSE & FALSE
> TRUE & c(TRUE, FALSE, FALSE)
> TRUE && c(TRUE, FALSE, FALSE)
> TRUE | c(TRUE, FALSE, FALSE)
> TRUE || c(TRUE, FALSE, FALSE)
> 5 > 8 || 6 != 8 && 4 > 3.9
> isTRUE(6 > 4)
> identical('twins', 'twins')
> xor(5 == 6, !FALSE)
> ints <- sample(10)
> ints > 5
> which(ints > 7)
> any(ints < 0)
> all(ints > 0)

Functions

> Sys.Date()
> mean(c(2, 4, 5))
> boring_function
function(x) {
  x
}
> boring_function('My first function!')
function(my_vector) {
  a <- sum(my_vector)
  b <- length(my_vector)
  a / b
}
> my_mean(c(4, 5, 10))
function(num, divisor = 2) {
  num %% divisor
}
> remainder(5)
> remainder(11, 5)
> remainder(divisor = 11, num = 5)
> remainder(4, div = 2)
> args(remainder)
function(func, dat){
  func(dat)
}
> evaluate(median, c(1.4, 3.6, 7.9, 8.8))
> evaluate(sd, c(1.4, 3.6, 7.9, 8.8))
> evaluate(function(x){x+1}, 6)
> evaluate(function(x){x[1]}, c(8, 4, 0))
> evaluate(function(x){x[length(x)]}, c(8, 4, 0))
> paste('Programming', 'is', 'fun!')
function(...){
  paste('START', ..., 'STOP', sep = ' ')
}
> telegram('1', '2', '3')
function(...){
  args <- list(...)
  place <- args['place']
  adjective <- args['adjective']
  noun <- args['noun']
  paste("News from", place, "today where", adjective, "students took to the streets in protest of the new", noun, "being installed on campus.")
}
> mad_libs(place = 'Russia', adjective = 'great', noun = 'square')
"%p%" <- function(a, b){
  paste(a, b)
}
> 'I' %p% 'love' %p% 'R!'

lapply and sapply

> head(flags)
> dim(flags)
> class(flags)
> cls_list <- lapply(flags, class)
> class(cls_list)
> cls_vect <- sapply(flags, class)
> class(cls_vect)
> sum(flags$orange)
> flag_colors <- flags[, 11:17]
> head(flag_colors)
> lapply(flag_colors, sum)
> sapply(flag_colors, sum)
> sapply(flag_colors, mean)
> flag_shapes <- flags[, 19:23]
> lapply(flags, range)
> lapply(flag_shapes, range)
> shape_mat <- sapply(flag_shapes, range)
> unique(c(3, 4, 5, 5, 5, 6, 6))
> unique_vals <- lapply(flags, unique)
> sapply(unique_vals, length)
> lapply(unique_vals, function(elem) elem[2])

vapply and tapply

> sapply(flags, unique)
> vapply(flags, unique, numeric(1))
> sapply(flags, class)
> vapply(flags, class, character(1))
> table(flags$landmass)
> table(flags$animate)
> tapply(flags$animate, flags$landmass, mean)
> tapply(flags$population, flags$red, summary)
> tapply(flags$population, flags$landmass, summary)

Looking at Data

> ls()
> class(plants)
> dim(plants)
> nrow(plants)
> ncol(plants)
> object.size(plants)
> names(plants)
> head(plants)
> head(plants, 10)
> tail(plants, 15)
> summary(plants)
> table(plants$Active_Growth_Period)
> str(plants)

Simulation

> sample(1:6, 4, replace = TRUE)
> sample(1:20, 10)
> LETTERS
> sample(LETTERS)
> flips <- sample(c(0, 1), 100, replace = TRUE, prob = c(0.3, 0.7))
> sum(flips)
> rbinom(1, size = 100, prob = 0.7)
> flips2 <- rbinom(100, size = 1, prob = 0.7)
> sum(flips2)
> rnorm(10)
> rnorm(mean = 100, sd = 25)
> rnorm(10, mean = 100, sd = 25)
> rpois(5, 10)
> my_pois <- replicate(100, rpois(5, 10))
> cm <- colMeans(my_pois)
> hist(cm)

Dates and Times

> d1 <- Sys.Date()
> class(d1)
[1] "Date"
> unclass(d1)
[1] 16904
> d1
[1] "2016-04-13"
> d2 <- as.Date("1969-01-01")
> d2
[1] "1969-01-01"
> unclass(d2)
[1] -365
> t1 <- Sys.time()
> t1
[1] "2016-04-13 11:14:20 CEST"
> class(t1)
[1] "POSIXct" "POSIXt"
> unclass(t1)
[1] 1460538861
> t2 <- as.POSIXlt(Sys.time())
> t2
[1] "2016-04-13 11:15:53 CEST"
> class(t2)
[1] "POSIXlt" "POSIXt"
> unclass(t2)
$sec
[1] 53.85903

$min
[1] 15

$hour
[1] 11

$mday
[1] 13

$mon
[1] 3

$year
[1] 116

$wday
[1] 3

$yday
[1] 103

$isdst
[1] 1

$zone
[1] "CEST"

$gmtoff
[1] 7200

attr(,"tzone")
[1] ""     "CET"  "CEST"
> str(unclass(t2))
> t2$min
> weekdays(d1)
> months(t1)
> quarters(t2)
> t3 <- 'October 17, 1986 08:24'
> t4 <- strptime(t3, '%B %d, %Y %H:%M')
> class(t4)
[1] "POSIXlt" "POSIXt"
> Sys.time() > t1
> Sys.time() - t1
> difftime(Sys.time(), t1, units = 'days')

Base Graphics

> data(cars)
> head(cars)
> plot(cars
> plot(x = cars$speed, y = cars$dist)
> plot(x = cars$dist, y = cars$speed)
> plot(x = cars$speed, y = cars$dist, xlab = 'Speed')
> plot(x = cars$speed, y = cars$dist, ylab = 'Stopping Distance')
> plot(x = cars$speed, y = cars$dist, xlab = 'Speed', ylab = 'Stopping Distance')
> plot(cars, main = 'My Plot')
> plot(cars, sub = 'My Plot Subtitle')
> plot(cars, col = 2)
> plot(cars, xlim = c(10, 15))
> plot(cars, pch = 2)
> data(mtcars)
> boxplot(mpg ~ cyl, data = mtcars)
> hist(mtcars$mpg)

Central Tendency

> cars
> myMPG <- cars$mpgCity
> mean(myMPG)
> median(myMPG)
> table(myMPG)

Dispersion

> range(cars$price)
> var(cars$price)
> sd(cars$price)

# The basics of programming in R

No comments: