## WORKSHOP CABW-R: Day 1 (Oct 13) # Welcome! # thanks for joining today # remember all our content will be at the website: https://ucd-cws.github.io/CABW2020_R_training/index.html # Welcome Again! # A few things to keep in mind: # - R is always case sensitive. # - Open shortcuts menu: Shift + Alt + K # - Run shortcut on PC/Mac: Control + Enter / Command + Enter # - Pipe (%>%) Shortcut: ctrl or cmd + shift + m # anything I type here is a comment # comments are really important # Day 1 Workshop ---------------------------------------------------------- # Working with basic R ---------------------------------------------------- fish <- 10 + 45 # add additional variables nitrogen <- 79-15 algae <- fish * nitrogen # Let's make a variable named riffle and assign a number to it riffle <- 100 # let's make a variable named pool and assign a list of numbers # to make a list of anything, we use "c( )" for "concatenate" or "combine" pool <- c(10, 20, 30) # And we can save with Cmd + S or Ctrl + S, or File > Save! # type type # Install Packages -------------------------------------------------------- # install.packages("sf") # Loading Libraries ------------------------------------------------------- library(tidyverse) cscidat <- read_csv("data/cscidat.csv") ascidat <- read_csv("data/ascidat.csv") View(cscidat) dim(ascidat) names(ascidat) # structure of data str(cscidat) # Data Structures --------------------------------------------------------- # double vector dbl_var <- c(1, 2.5, 4.5) # integer vector int_var <- c(1L, 6L, 10L) # logical log_var <- c(TRUE, FALSE, TRUE, FALSE) # character chr_var <- c("frog", "dog", "cat") # check for the class of the data or vector class(dbl_var) length(log_var) mean(chr_var) mean(dbl_var) mixed_dat <- chr_var + log_var mixed_dat <- int_var + dbl_var # mean of some data mean(cscidat$CSCI) mean(cscidat$CSCI, na.rm = TRUE) # assign new vector value Sites <- cscidat$StationCode Sites # Wrangling and Plotting -------------------------------------------------- # Many of the functions below are part of the "dplyr" package. # Selecting # first, select some chosen columns dplyr_sel1 <- select(cscidat, SampleID_old, New_Lat, New_Long, CSCI) View(dplyr_sel1) head(dplyr_sel1) # select everything but CSCI and COMID: the "-" sign indicates "not" dplyr_sel2 <- select(cscidat, -CSCI, -COMID) head(dplyr_sel2) #examines top few rows of dataframe # select columns that contain the letter c dplyr_sel3 <- select(cscidat, matches('c')) head(dplyr_sel3) # Filtering # get CSCI scores greater than 0.79 dplyr_filt1 <- filter(cscidat, CSCI > 0.79) View(dplyr_filt1) # get CSCI scores above latitude 37N dplyr_filt2 <- filter(cscidat, New_Lat > 37) View(dplyr_filt2) # use both filters dplyr_filt3 <- filter(cscidat, CSCI > 0.79 & New_Lat > 37) View(dplyr_filt3) # & signifies "and" # | signifies "or" # == signifies equality # Mutating # get observed taxa dplyr_mut1 <- mutate(cscidat, observed = OE * E) # creates new column named observed View(dplyr_mut1) # add a column for lo/hi csci scores dplyr_mut2 <- mutate(cscidat, CSCIcat = ifelse(CSCI <= 0.79, 'lo', 'hi')) # ifelse(*criteria*, 'this', 'that') # More functions # arrange by CSCI scores dplyr_arr <- arrange(cscidat, CSCI) head(dplyr_arr) # rename lat/lon (note the format of newName = oldName) dplyr_rnm <- rename(cscidat, lat = New_Lat, lon = New_Long) head(dplyr_rnm) # Piping # pipe %>% csci_new <- cscidat %>% # Use the original dataset and then... select(CSCI, COMID, New_Lat) %>% # select only CSCI, COMID, and latitude columns... filter(New_Lat > 37) %>% # filter for Latitudes above 37 ... mutate(CSCIcat = ifelse(CSCI <= 0.79, 'lo', 'hi')) # create new column with categories according to CSCI values. # Note, we do not need to specify the full dataset in each call since it is identified at the very beginning. # Challenge #4 asci_new <- ascidat %>% # Using the original dataset and then... select(ASCI, site_type) %>% # selecting columns of interest... filter(site_type == "Reference") %>% # filter for reference sites... mutate(ASCI_10 = ASCI * 10) # create new column with ASCI scores * 10 View(asci_new) # Today, we'll use the package "ggplot2" to plot our data. # Three main components: # ggplot() - tell R to make a plot # geom() - tell R which geometries to use # aes() - allow you to customize your plot # empty base plot ggplot(data = ascidat) # add scatterplot ggplot(data = ascidat) + geom_point() # specify aesthetics ggplot(data = ascidat) + geom_point(mapping = aes(x = site_type, y = ASCI)) # create boxplot asci_box <- ggplot(data = ascidat) + # initial data input geom_boxplot(mapping = aes(x = site_type, y = ASCI)) + # specify axes and geometry labs(x = "Site Type", y = "ASCI Score", title = "CABW Workshop Figure", subtitle = "October 13, 2020", caption = "Data Source: CEDEN") # add appropriate labels asci_box # save our plot ggsave(("asci_boxplot.png"), path = "data/", # be sure you specify the path width = 25, height = 15, units = "cm") # The ggsave() function saves the most recently created plot. # END OF LESSON DAY 1.