########################################### # SECTION 1: REORDERING BY BAR/POINT SIZE # ########################################### ## Create a data set we can alter mtcars3 <- mtcars2 <- data.frame(car=rownames(mtcars), mtcars, row.names=NULL) mtcars3\$cyl <- mtcars2\$cyl <- as.factor(mtcars2\$cyl) head(mtcars2) ## An Example of Unordered Bars/Points library(ggplot2) library(gridExtra) x <- ggplot(mtcars2, aes(y=car, x=mpg)) + geom_point(stat="identity") y <- ggplot(mtcars2, aes(x=car, y=mpg)) + geom_bar(stat="identity") + coord_flip() grid.arrange(x, y, ncol=2) ## An Example of Ordered Bars/Points ## Re-level the cars by mpg mtcars3\$car <- factor(mtcars2\$car, levels=mtcars2[order(mtcars\$mpg), "car"]) x <- ggplot(mtcars3, aes(y=car, x=mpg)) + geom_point(stat="identity") y <- ggplot(mtcars3, aes(x=car, y=mpg)) + geom_bar(stat="identity") + coord_flip() grid.arrange(x, y, ncol=2) ## An Example of Ordered and Faceted Bars/Points ## Re-level the carb by average mpg (ag_mtcars <- aggregate(mpg ~ carb, mtcars3, mean)) mtcars3\$carb <- factor(mtcars2\$carb, levels=ag_mtcars[order(ag_mtcars\$mpg), "carb"]) ggplot(mtcars3, aes(y=carb, x=mpg)) + geom_point(stat="identity", size=2, aes(color=carb)) ## An Example of Ordered and Faceted Bars/Points ggplot(mtcars3, aes(y=car, x=mpg)) + geom_point(stat="identity") + facet_grid(cyl~., scales = "free", space="free") ################################################################# # SECTION 2: SPEEDING UP THE WORKFLOW WITH THE PLOTFLOW PACKAGE # ################################################################# ## Getting the "plotflow package" browseURL("https://github.com/trinker/plotflow") # install.packages("devtools") library(devtools) install_github("plotflow", "trinker") ## What Does `order_by` do? library(plotflow) dat <- aggregate(cbind(mpg, hp, disp)~carb, mtcars, mean) dat\$carb <- factor(dat\$carb) ## compare levels (data set looks the same though) dat\$carb order_by(carb, ~-hp + -mpg, data = dat)\$carb ## Return just the vector with new levels order_by(carb, ~ -hp + -mpg, dat, df=FALSE) ## Use `order_by` to Order Bars library(ggplot2) ## Reset the data from Section 1 dat2 <- data.frame(car=rownames(mtcars), mtcars, row.names=NULL) ggplot(order_by(car, ~ mpg, dat2), aes(x=car, y=mpg)) + geom_bar(stat="identity") + coord_flip() + ggtitle("Order Pretty Easy") ## Aggregated by Summary Stat ###Carb Ordered By Summary (Mean) of mpg ## Ordered points with the order_by function a <- ggplot(order_by(carb, ~ mpg, dat2, mean), aes(x=carb, y=mpg)) + geom_point(stat="identity", aes(colour=carb)) + coord_flip() + ggtitle("Ordered Dot Plots Made Easy") ## Reverse the ordered points b <- ggplot(order_by(carb, ~ -mpg, dat2, mean), aes(x=carb, y=mpg)) + geom_point(stat="identity", aes(colour=carb)) + coord_flip() + ggtitle("Reverse Order Too!") grid.arrange(a, b, ncol=1) ## Nested Usage (order_by on an order by dataframe) ggplot(order_by(gear, ~mpg, dat2, mean), aes(mpg, carb)) + geom_point(aes(color=factor(cyl))) + facet_grid(gear~., scales="free") + ggtitle("I'm Nested (Yay for me!)") ########################################## # SECTION 3: USING ORDER_BY ON REAL DATA # ########################################## library(RCurl) library(XML) library(rjson) library(ggplot2) library(qdap) library(reshape2) library(gridExtra) ## GRab the data URL <- "http://www.payscale.com/top-tech-employers-compared-2012/job-satisfaction-survey-data" doc <- htmlTreeParse(URL, useInternalNodes=TRUE) nodes <- getNodeSet(doc, "//script[@type='text/javascript']")[[19]][[1]] dat <- gsub("];", "]", capture.output(nodes)[5:27]) ndat <- data.frame(do.call(rbind, fromJSON(paste(dat, collapse = ""))))[, -2] ndat[, 1:5] <- lapply(ndat, unlist) IBM <- grepl("International Business Machines", ndat[, 1]) ndat[IBM, 1] <- bracketXtract(ndat[IBM, 1]) ndat[, 1] <- sapply(strsplit(ndat[, 1], "\\s|,"), "[", 1) ## Re-level with order_by ndat[, "Employer.Name"] <- order_by(Employer.Name, ~Job.Satisfaction, ndat, df=FALSE) colnames(ndat)[1] <- "Employer" ## Melt the data to long format mdat <- melt(ndat) mdat[, 2] <- factor(gsub("\\.", " ", mdat[, 2]), levels = gsub("\\.", " ", colnames(ndat)[-1])) head(mdat) ggplot(data=mdat, aes(x=Employer, y=value, fill=factor(Employer))) + geom_bar(stat="identity") + coord_flip() + ylim(c(0, 1)) + facet_wrap( ~ variable, ncol=2) + theme(legend.position="none") + ggtitle("Plot 3: Employee Job Satisfaction at Top Tech Companies") + ylab(c("Job Satisfaction")) ## A regression model mod <- lm(Job.Satisfaction ~ Work.Stress + Job.Meaning + Job.Flexibility, data=ndat) mod anova(mod) summary(mod) theplot <- ggplot(data=ndat, aes(x = Job.Meaning, y = Job.Satisfaction)) + geom_smooth(method="lm", fill = "blue", alpha = .1, size=1) + geom_smooth(color="red", fill = "pink", alpha = .3, size=1) + xlim(c(.4, .9)) + geom_point(aes(size = Job.Flexibility, colour = Work.Stress)) + geom_text(aes(label=Employer), size = 3, hjust=-.1, vjust=-.1) + scale_colour_gradient(low="gold", high="red") theplot theplot + annotation_custom(grob=circleGrob(r = unit(.4,"npc")), xmin=.47, xmax=.57, ymin=.72, ymax=.82) ndat\$outs <- 1 ndat\$outs[ndat\$Employer %in% qcv(AOL, Amazon.com, Nvidia, Sony)] <- 0 ggplot(data=ndat, aes(x = Job.Meaning, y = Job.Satisfaction)) + geom_smooth(method="lm", fill = "blue", alpha = .1, size=1, aes(group=outs)) + geom_smooth(color="red", fill = "pink", alpha = .3, size=1) + xlim(c(.4, .9)) + geom_point(aes(size = Job.Flexibility, colour = Work.Stress)) + geom_text(aes(label=Employer), size = 3, hjust=-.1, vjust=-.1) + scale_colour_gradient(low="gold", high="red")