## WORKSHOP CABW-R: Day 1 (Oct 13)

# Welcome!

# thanks for joining today
# remember all our content will be at the website: https://ucd-cws.github.io/CABW2020_R_training/index.html

# Welcome Again!

# A few things to keep in mind:
# - R is always case sensitive.
# - Open shortcuts menu: Shift + Alt + K
# - Run shortcut on PC/Mac: Control + Enter / Command + Enter
# - Pipe (%>%) Shortcut: ctrl or cmd + shift + m

# anything I type here is a comment
# comments are really important

# Day 1 Workshop ----------------------------------------------------------


# Working with basic R ----------------------------------------------------

fish <- 10 + 45

# add additional variables
nitrogen <- 79-15

algae <- fish * nitrogen


# Let's make a variable named riffle and assign a number to it
riffle <- 100

# let's make a variable named pool and assign a list of numbers
# to make a list of anything, we use "c( )" for "concatenate" or "combine"
pool <- c(10, 20, 30)

# And we can save with Cmd + S or Ctrl + S, or File > Save!


# type type


# Install Packages --------------------------------------------------------

# install.packages("sf")


# Loading Libraries -------------------------------------------------------

library(tidyverse)

cscidat <- read_csv("data/cscidat.csv")
ascidat <- read_csv("data/ascidat.csv")


View(cscidat)


dim(ascidat)


names(ascidat)

# structure of data
str(cscidat)


# Data Structures ---------------------------------------------------------


# double vector
dbl_var <- c(1, 2.5, 4.5)

# integer vector
int_var <- c(1L, 6L, 10L)

# logical
log_var <- c(TRUE, FALSE, TRUE, FALSE)

# character
chr_var <- c("frog", "dog", "cat")


# check for the class of the data or vector
class(dbl_var)

length(log_var)


mean(chr_var)

mean(dbl_var)

mixed_dat <- chr_var + log_var


mixed_dat <- int_var + dbl_var


# mean of some data

mean(cscidat$CSCI)

mean(cscidat$CSCI, na.rm = TRUE)


# assign new vector value
Sites <- cscidat$StationCode

Sites


# Wrangling and Plotting --------------------------------------------------

# Many of the functions below are part of the "dplyr" package.

# Selecting

# first, select some chosen columns
dplyr_sel1 <- select(cscidat, SampleID_old, New_Lat, New_Long, CSCI)
View(dplyr_sel1)
head(dplyr_sel1)

# select everything but CSCI and COMID: the "-" sign indicates "not"
dplyr_sel2 <- select(cscidat, -CSCI, -COMID)
head(dplyr_sel2) #examines top few rows of dataframe

# select columns that contain the letter c
dplyr_sel3 <- select(cscidat, matches('c'))
head(dplyr_sel3)

# Filtering

# get CSCI scores greater than 0.79
dplyr_filt1 <- filter(cscidat, CSCI > 0.79)
View(dplyr_filt1)

# get CSCI scores above latitude 37N
dplyr_filt2 <- filter(cscidat, New_Lat > 37)
View(dplyr_filt2)

# use both filters
dplyr_filt3 <- filter(cscidat, CSCI > 0.79 & New_Lat > 37)
View(dplyr_filt3)

# & signifies "and"
# | signifies "or"
# == signifies equality

# Mutating

# get observed taxa
dplyr_mut1 <- mutate(cscidat, observed = OE * E) # creates new column named observed
View(dplyr_mut1)

# add a column for lo/hi csci scores
dplyr_mut2 <- mutate(cscidat, CSCIcat = ifelse(CSCI <= 0.79, 'lo', 'hi'))

# ifelse(*criteria*, 'this', 'that')

# More functions

# arrange by CSCI scores
dplyr_arr <- arrange(cscidat, CSCI)
head(dplyr_arr)

# rename lat/lon (note the format of newName = oldName)
dplyr_rnm <- rename(cscidat, lat = New_Lat, lon = New_Long)
head(dplyr_rnm)

# Piping

# pipe %>%

csci_new <- cscidat %>% # Use the original dataset and then...
  select(CSCI, COMID, New_Lat) %>% # select only CSCI, COMID, and latitude columns...
  filter(New_Lat > 37) %>% # filter for Latitudes above 37 ...
  mutate(CSCIcat = ifelse(CSCI <= 0.79, 'lo', 'hi')) # create new column with categories according to CSCI values.

# Note, we do not need to specify the full dataset in each call since it is identified at the very beginning.

# Challenge #4

asci_new <- ascidat %>% # Using the original dataset and then...
  select(ASCI, site_type) %>% # selecting columns of interest...
  filter(site_type == "Reference") %>% # filter for reference sites...
  mutate(ASCI_10 = ASCI * 10) # create new column with ASCI scores * 10
View(asci_new)

# Today, we'll use the package "ggplot2" to plot our data.

# Three main components:
# ggplot() - tell R to make a plot
# geom() - tell R which geometries to use
# aes() - allow you to customize your plot

# empty base plot
ggplot(data = ascidat)

# add scatterplot
ggplot(data = ascidat) +
  geom_point()

# specify aesthetics
ggplot(data = ascidat) +
  geom_point(mapping = aes(x = site_type, y = ASCI))

# create boxplot
asci_box <- ggplot(data = ascidat) + # initial data input
  geom_boxplot(mapping = aes(x = site_type, y = ASCI)) + # specify axes and geometry
  labs(x = "Site Type",
    y = "ASCI Score",
    title = "CABW Workshop Figure",
    subtitle = "October 13, 2020",
    caption = "Data Source: CEDEN") # add appropriate labels

asci_box

# save our plot

ggsave(("asci_boxplot.png"),
  path = "data/", # be sure you specify the path
  width = 25,
  height = 15,
  units = "cm")

# The ggsave() function saves the most recently created plot.

# END OF LESSON DAY 1.