2019-12-13

github.com/k-hench

Motivation

Writing your own functions in R can help you to write better R code.

By better I mean:

  • clearer scripts that are easier to understand
  • shorter scripts
  • less chances of (copy & paste) errors
  • scripts that are easier to debug


To follow this tutorial you’ll
need to install a few packages:

install.packages("devtools")
devtools::install_github("hadley/tidyverse")
devtools::install_github("r-lib/rlang")
devtools::install_github("k-hench/hypoimg")
devtools::install_github('k-hench/stupid1')
devtools::install_github('k-hench/stupid2')

Creating a function

To create a new function, use function():

Our new function is going to be called add_1(), it is going to have a single parameter x and return the sum of x + 1:

function( x ) { x + 1 }


add_1 <- function(x){x + 1}

After assigning the function, we can call it like any other R function:

add_1
#> function(x){x + 1}
add_1( x = 3 )
#> [1] 4

Defaults

We can also define default values for parameters.

sum_of_two <- function(x, y = 4){ x + y }
sum_of_two(x = 1)
#> [1] 5

sum_of_two(x = 1, y = 1)
#> [1] 2
add_1(y = 1)
#> Error in add_1(y = 1): unused argument (y = 1)

Environments

add_2a <- function(){x + 2}
add_2b <- function(){ x <- 4; x + 2 }
add_2c <- function(x = 4){ x + 2 }
add_2a()
#> Error in add_2a(): object 'x' not found

add_2b()
#> [1] 6
add_2c()
#> [1] 6
x <- 6
add_2a()
#> [1] 8
add_2b()
#> [1] 6
add_2c()
#> [1] 6

Search path

library(rlang)
paster_0 <- function(){paste0(c('X: ','Y: '), c(stupid_x, stupid_y))}
paster_1 <- function(){stupid_y <- "Z";  paste0(c('X: ','Y: '), c(stupid_x, stupid_y))}
library(stupid1)
paster_0()
#> [1] "X: A" "Y: B"

stupid_x
#> [1] "A"
stupid_y
#> [1] "B"

library(stupid2)
paster_0()
#> [1] "X: C" "Y: B"

env_names(search_envs()[['package:stupid1']])
#> [1] "stupid_x"   "stupid_y"   "stupid_fun"
env_names(search_envs()[['package:stupid2']])
#> [1] "stupid_x"
stupid1::stupid_x
#> [1] "A"
stupid2::stupid_x
#> [1] "C"

stupid_x <- 'D'
stupid_y <- '!'
paster_0()
#> [1] "X: D" "Y: !"
paster_1()
#> [1] "X: D" "Y: Z"

The … argument

function( x, … ) { message(x) }


paster_2 <- function(x, ...){ message(x) }
paster_2(x = 'test')
#> test
paster_2(x = 'test', y = 'something', z = 'else')
#> test
paster_3 <- function(x, ...){ 
  message(paste('x: ', x))
  message(paste('other parameters:', ...))
}
paster_3(x = 'test')
#> x:  test
#> other parameters:
paster_3(x = 'test', y = 'something', z = 'else')
#> x:  test
#> other parameters: something else

The %>% operator

One of the core fuctionalities of the tidyverse is the implementation of the pipe (%>%) into the data wrangling process:

data %>% do_something( x, other, paramerters )


All that it does is to pass the object on its left as the first argument into the function on its right.

This operator was introduced by the magrittr package:

Pipelines

function( x ) { x + 1 }


library(magrittr)

The pipe is an elegant way of connecting severeal functions that are executed one after the other.

Using our silly function that simply addes 1 to an input value, there are several ways to get from 1 to 4:


sequential

a <- 1
b <- add_1(a)
c <- add_1(b)
d <- add_1(c)
d
#> [1] 4

nested

add_1(add_1(add_1(1)))
#> [1] 4

pipeline

1 %>%
  add_1() %>%
  add_1() %>%
  add_1()
#> [1] 4

Fishes

library(tidyverse)
library(hypoimg)
#> --- Welcome to hypoimg ---
svg_file <- system.file("extdata",
                        "logo2.c.svg", 
                        package = "hypoimg")

svg <- hypo_read_svg(svg_file)
library(cowplot)
ggdraw(svg)

Recolor

hypo_recolor_svg
#> function (svg, layer = 1, color = "darkgray") 
#> {
#>     svg[[4]][[1]][[4]][[1]][[4]][[layer]]$gp$fill <- color
#>     svg
#> }
#> <bytecode: 0x9729920>
#> <environment: namespace:hypoimg>
svg %>% hypo_recolor_svg(layer = 1, color = 'red') %>% ggdraw()

Map

1:4 %>% 
  purrr::map(add_1)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 3
#> 
#> [[3]]
#> [1] 4
#> 
#> [[4]]
#> [1] 5
1:4 %>% 
  purrr::map(add_1) %>% 
  unlist()
#> [1] 2 3 4 5
add_1 <- function( x ) { x + 1 }

Two parameters

map(.x = 1:3, .f = sum_of_two, 
    y = (1:3)*10)
#> [[1]]
#> [1] 11 21 31
#> 
#> [[2]]
#> [1] 12 22 32
#> 
#> [[3]]
#> [1] 13 23 33
map2(.x = 1:3, .y = (1:3)*10,
     .f = sum_of_two)
#> [[1]]
#> [1] 11
#> 
#> [[2]]
#> [1] 22
#> 
#> [[3]]
#> [1] 33

sum_of_two <- function( x, y ) { x + y }


Painting fish

clr <- c( "#FFFFFF", "#BDB596", "#A16D5A", "#590D0E")

map2(.x = 1:4, .y = clr, .f = hypo_recolor_svg, svg = svg) %>%  plot_grid(plotlist = .)

Variants

Alternative output format

mix_of_two <- function(x, y = 4){ 
  tibble( x = x, y = y,
          s = x + y, 
          p = x * y)
}
mix_of_two(1,2)
#> # A tibble: 1 x 4
#>       x     y     s     p
#>   <dbl> <dbl> <dbl> <dbl>
#> 1     1     2     3     2

map2_dfr(.x = 1:3, .y = (1:3)*10,
         .f =  mix_of_two)
#> # A tibble: 3 x 4
#>       x     y     s     p
#>   <int> <dbl> <dbl> <dbl>
#> 1     1    10    11    10
#> 2     2    20    22    40
#> 3     3    30    33    90

Silent function call:

create_file <- function(input){ 
  name <- str_c("tmp/", input, ".txt")
  write_lines(x = input,
              path = name)
}

dir.create('tmp')
#> Warning in dir.create("tmp"): 'tmp' already exists
letters[1:2] %>%
  map(create_file)
#> [[1]]
#> [1] "a"
#> 
#> [[2]]
#> [1] "b"

letters[3:4] %>%
  walk(create_file)

dir(path = "tmp",pattern = "txt")
#> [1] "a.txt" "b.txt" "c.txt" "d.txt"

Reduce

reduce(1:3,sum_of_two,.init = 6) 
#> [1] 12
# 6 + 1 + 2 + 3
reduce2(.x = 1:4,
        .y = clr,
        .f = hypo_recolor_svg,
        .init = svg) %>% 
  ggdraw()
sum_of_two <- function( x, y ) { x + y }

How does this help?

Repetetive work

options(width = 60)
data_dir <- "data/"
dir(data_dir) %>% 
  head(16)
#>  [1] "experimentX_anglefish_cold_1.tsv"
#>  [2] "experimentX_anglefish_cold_2.tsv"
#>  [3] "experimentX_anglefish_cold_3.tsv"
#>  [4] "experimentX_anglefish_cool_1.tsv"
#>  [5] "experimentX_anglefish_cool_2.tsv"
#>  [6] "experimentX_anglefish_cool_3.tsv"
#>  [7] "experimentX_anglefish_hot_1.tsv" 
#>  [8] "experimentX_anglefish_hot_2.tsv" 
#>  [9] "experimentX_anglefish_hot_3.tsv" 
#> [10] "experimentX_anglefish_warm_1.tsv"
#> [11] "experimentX_anglefish_warm_2.tsv"
#> [12] "experimentX_anglefish_warm_3.tsv"
#> [13] "experimentX_chromis_cold_1.tsv"  
#> [14] "experimentX_chromis_cold_2.tsv"  
#> [15] "experimentX_chromis_cold_3.tsv"  
#> [16] "experimentX_chromis_cool_1.tsv"

experiment_files <- dir(data_dir)

(eg. creating dummy data)

make_fake_data <- function(spec, treatment, rep, n = 50){
  tibble(driver = rnorm(n), 
         response = rnorm(n)) %>% 
    write_tsv(path = str_c("data/experimentX_",spec, "_", treatment, "_", rep,".tsv"))
}

dir.create('data')

tibble(spec = rep(c('anglefish', 'grouper', 'chromis', 'seahorse'), each = 12),
       treatment = rep(rep(c("cold", "cool", "warm", "hot"), 4), each = 3),
       rep = rep(1:3,16)) %>%
  pwalk(make_fake_data)

Smooth import

get_experiment_data <- function(file) {
  tag <- file %>% str_remove("^.*experimentX_") %>% str_remove(".tsv")
  
  read_tsv(file) %>%
    mutate(tag = tag) %>%
    separate(tag, into = c("species", "treatment", "replicate"),convert = TRUE, sep = "_") %>%
    mutate(grp = str_c(species, "_", treatment)) %>%
    select(species:replicate,driver, response, grp)
}

experiment_data <- str_c(data_dir,experiment_files) %>%
  map_dfr(get_experiment_data)
#> # A tibble: 5 x 6
#>   species   treatment replicate driver response grp           
#>   <chr>     <chr>         <int>  <dbl>    <dbl> <chr>         
#> 1 anglefish cold              1  0.249   -0.391 anglefish_cold
#> 2 anglefish cold              1  0.396    0.839 anglefish_cold
#> 3 anglefish cold              1 -2.50    -0.167 anglefish_cold
#> 4 anglefish cold              1  0.184   -1.50  anglefish_cold
#> 5 anglefish cold              1  0.799    0.376 anglefish_cold

Many models

data_nested <- experiment_data %>%
    group_by(grp) %>%
    nest() %>%
    mutate(mod =  map(data, ~ lm(.$response~.$driver)))
#> # A tibble: 10 x 3
#> # Groups:   grp [10]
#>    grp                      data mod   
#>    <chr>          <list<df[,5]>> <list>
#>  1 anglefish_cold      [150 × 5] <lm>  
#>  2 anglefish_cool      [150 × 5] <lm>  
#>  3 anglefish_hot       [150 × 5] <lm>  
#>  4 anglefish_warm      [150 × 5] <lm>  
#>  5 chromis_cold        [150 × 5] <lm>  
#>  6 chromis_cool        [150 × 5] <lm>  
#>  7 chromis_hot         [150 × 5] <lm>  
#>  8 chromis_warm        [150 × 5] <lm>  
#>  9 grouper_cold        [150 × 5] <lm>  
#> 10 grouper_cool        [150 × 5] <lm>

Summarizing Models function

options(width = 100)
summarise_model <- function(data){
  data$mod %>%
    purrr::map_dfr(broom::glance) %>% 
    bind_cols(., data$mod %>% 
                purrr::map_dfr(broom::tidy) %>% 
                mutate(grp = (row_number()+1)%/%2) %>% 
                select(grp,term,estimate) %>%
                pivot_wider(names_from = 'term',values_from = 'estimate') %>% 
                select(-grp) %>% 
                purrr::set_names(nm = c('intercept', 'slope')))
}

lm(x ~ y, data = tibble(x = rnorm(100), y = rnorm(100))) %>%
  list(.) %>% tibble(mod = .) %>%
  summarise_model()
#> # A tibble: 1 x 13
#>   r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC deviance df.residual
#>       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>    <dbl>       <int>
#> 1    0.0104      0.000258  1.19      1.03   0.314     2  -158.  322.  330.     138.          98
#> # … with 2 more variables: intercept <dbl>, slope <dbl>

Summarizing function in action

data_nested <- data_nested %>% 
    bind_cols(., summarise_model(.)) %>%
  mutate(prep = grp) %>%
  separate(prep, into = c("species", "treatment")) %>%
  mutate(treatment = factor(treatment, levels = c("cold", "cool", "warm","hot")))
#> # A tibble: 10 x 18
#> # Groups:   grp [10]
#>    grp        data mod   r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC deviance
#>    <chr> <list<df> <lis>     <dbl>         <dbl> <dbl>     <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>    <dbl>
#>  1 angl… [150 × 5] <lm>   0.0238         0.0172  1.01     3.61    0.0595     2  -213.  431.  440.     150.
#>  2 angl… [150 × 5] <lm>   0.0280         0.0215  1.01     4.27    0.0406     2  -213.  432.  441.     150.
#>  3 angl… [150 × 5] <lm>   0.000215      -0.00654 1.08     0.0318  0.859      2  -224.  453.  462.     173.
#>  4 angl… [150 × 5] <lm>   0.00324       -0.00349 0.990    0.481   0.489      2  -210.  427.  436.     145.
#>  5 chro… [150 × 5] <lm>   0.00251       -0.00423 1.03     0.373   0.542      2  -216.  438.  447.     156.
#>  6 chro… [150 × 5] <lm>   0.00273       -0.00401 0.882    0.405   0.525      2  -193.  392.  401.     115.
#>  7 chro… [150 × 5] <lm>   0.00107       -0.00568 1.09     0.158   0.691      2  -225.  457.  466.     177.
#>  8 chro… [150 × 5] <lm>   0.00848        0.00178 1.01     1.27    0.262      2  -213.  432.  441.     150.
#>  9 grou… [150 × 5] <lm>   0.000210      -0.00655 0.998    0.0310  0.860      2  -212.  429.  438.     147.
#> 10 grou… [150 × 5] <lm>   0.00264       -0.00410 0.956    0.392   0.532      2  -205.  416.  425.     135.
#> # … with 5 more variables: df.residual <int>, intercept <dbl>, slope <dbl>, species <chr>, treatment <fct>

Plotting results

clrs <- RColorBrewer::brewer.pal(7,name = "RdBu")[c(7,6,2,1)]
label_fun <- function(lab){str_c("R^2: ", round(lab,3))}
experiment_data %>%
  ggplot(aes(x = driver, y = response,
             color = treatment))+
  geom_point(alpha = .2)+
  geom_abline(data = data_nested,
              aes(intercept = intercept,
                  color = treatment,
                  slope = slope))+
  geom_text(data = data_nested,
            aes(x = -Inf, y = -Inf, 
                label = label_fun(r.squared)),
            hjust = 0, vjust = 0, 
            parse = TRUE)+
  facet_grid(treatment ~ species,
             as.table = FALSE) +
  scale_colour_manual(values = clrs,
                      guide = FALSE)+
  theme_minimal()