Overview

It’s been a while since I’d done any modeling so, quite on accident, I’d discovered that Rstudio was promoting a new package called tidymodels. Prior to stumbling into the new package, the caret package, by Max Kuhn, was the default for any modeling that I did. So it was a great comfort to learn that he released an updated, vegetable-themed package parsnip.

There’s a set of five tutorials in the tidymodel to get you started. They’re extremely helpful and should be viewed prior to further reading of this post. In order to absorb the lessons, I essentially distilled the narrative and code blocks into scripts with comments only. Using the new outline feature in Rstudio 1.4 (I hadn’t updated Rstudio in a while) the new commenting paradigm gave structure to each lesson.

They’ve been included as gist embeds so the text can be copied and pasted into your script. Make sure to hit the outline button in Rstudio so you can see the outline headings.

Figure 1: Show document outline button in Rstudio

First Tutorial

	#################################################################
	## tidymodels ##
	## 1 build a model ##
	## url: https://www.tidymodels.org/start/models/ ##
	#################################################################
	# 1.0 INTRODUCTION ----
	## 1.1 Parsnip pkg + the rest of tidymodels ----
	library(tidymodels)
	## 1.2 Helper packages ----
	library(readr) # for importing data
	library(broom.mixed) # for converting bayesian models to tidy tibbles
	library(dotwhisker) # for visualizing regression results
	# 2.0 SEA URCHINS DATA ----
	## 2.1 About ----
	# https://link.springer.com/article/10.1007/BF00349318
	## 2.2 Read-in ----
	urchins <-
	# Data were assembled for a tutorial
	# at https://www.flutterbys.com.au/stats/tut/tut7.5a.html
	read_csv("https://tidymodels.org/start/models/urchins.csv") %>%
	# Change the names to be a little more verbose
	setNames(c("food_regime", "initial_volume", "width")) %>%
	# Factors are very helpful for modeling, so we convert one column
	mutate(food_regime = factor(food_regime, levels = c("Initial", "Low", "High")))
	## 2.3 Plot ----
	ggplot(urchins,
	aes(x = initial_volume,
	y = width,
	group = food_regime,
	col = food_regime)) +
	geom_point() +
	geom_smooth(method = lm, se = FALSE) +
	scale_color_viridis_d(option = "plasma", end = .7)
	# 3.0 BUILD AND FIT A MODEL ----
	#A standard two-way analysis of variance (ANOVA) model makes sense for this
	#dataset because it contains a continuous and categorical predictor
	## 3.1 Designate model ----
	lm_mod <-
	linear_reg() %>%
	set_engine("lm")
	## 3.2 Train/fit/estimate model ----
	lm_fit <-
	lm_mod %>%
	fit(width ~ initial_volume * food_regime, data = urchins)
	## 3.3 print tidy ----
	tidy(lm_fit)
	## 3.4 plot results ----
	tidy(lm_fit) %>%
	dwplot(dot_args = list(size = 2, color = "black"),
	whisker_args = list(color = "black"),
	vline = geom_vline(xintercept = 0, colour = "grey50", linetype = 2))
	# 4.0 USE A MODEL TO PREDICT ----
	## 4.1 Create new points ----
	new_points <- expand.grid(initial_volume = 20,
	food_regime = c("Initial", "Low", "High"))
	## 4.2 Fit model to new data points ----
	mean_pred <- predict(lm_fit, new_data = new_points)
	## 4.3 Create confidence intervals for predictions ----
	conf_int_pred <- predict(lm_fit,
	new_data = new_points,
	type = "conf_int")
	## 4.4 New data: new points + estimates + conf int ----
	plot_data <-
	new_points %>%
	bind_cols(mean_pred) %>%
	bind_cols(conf_int_pred)
	## 4.5 And then plot . . . ----
	ggplot(plot_data, aes(x = food_regime)) +
	geom_point(aes(y = .pred)) +
	geom_errorbar(aes(ymin = .pred_lower,
	ymax = .pred_upper),
	width = .2) +
	labs(y = "urchin size")
	# 5.0 MODEL WITH A DIFFERENT ENGINE ----
	## 5.1 Set the prior distribution ----
	#couldn't get 'rstanarm' installed
	prior_dist <- rstanarm::student_t(df = 1)
	set.seed(123)
	## 5.2 Make the parsnip model ----
	bayes_mod <-
	linear_reg() %>%
	set_engine("stan",
	prior_intercept = prior_dist,
	prior = prior_dist)
	## 5.3 Train the model ----
	bayes_fit <-
	bayes_mod %>%
	fit(width ~ initial_volume * food_regime, data = urchins)
	## 5.4 Print the model ----
	print(bayes_fit, digits = 5)
	# 6.0 WHY DOES IT WORK THAT WAY? ----
	#the modeling code uses the pipe to pass around the model object:
	bayes_mod %>%
	fit(width ~ initial_volume * food_regime, data = urchins)

view raw 01_build_model.R hosted with ❤ by GitHub

Second Tutorial

	##################################################################
	## tidymodels ##
	## 2 Preprocess with Recipes ##
	## url: https://www.tidymodels.org/start/recipes/ ##
	##################################################################

	# 1.0 INTRODUCTION ----
	## 1.1 tidymodels ----
	library(tidymodels)
	## 1.2 Helper packages
	library(nycflights13) # for flight data
	library(skimr) # for variable summaries
	# 2.0 THE NEW YORK CITY FLIGHT DATA ----
	## 2.1 Set seed ----
	set.seed(123)
	## 2.2 Load ----
	data(flights)
	## 2.3 View original ----
	skimr::skim(flights)
	## 2.4 Change ----
	flight_data <-
	flights %>%
	mutate(
	# Convert the arrival delay to a factor
	arr_delay = ifelse(arr_delay >= 30, "late", "on_time"),
	arr_delay = factor(arr_delay),
	# We will use the date (not date-time) in the recipe below
	date = as.Date(time_hour)
	) %>%
	# Include the weather data
	inner_join(weather, by = c("origin", "time_hour")) %>%
	# Only retain the specific columns we will use
	select(dep_time, flight, origin, dest, air_time, distance,
	carrier, date, arr_delay, time_hour) %>%
	# Exclude missing data
	na.omit() %>%
	# For creating models, it is better to have qualitative columns
	# encoded as factors (instead of character strings)
	mutate_if(is.character, as.factor)
	##2.5 View changes -----
	glimpse(flight_data)

	#3.0 DATA SPLITTING ----
	## 3.1 Set seed ----
	set.seed(555)
	## 3.2 Split ----
	data_split <- initial_split(flight_data, prop = 3/4)
	## 3.3 Training & Testing ----
	train_data <- training(data_split)
	test_data <- testing(data_split)

	# 4.0 CREATE RECIPE AND ROLES ----
	## 4.1 Original recipe ----
	flights_rec <-
	recipe(arr_delay ~ ., data = train_data)
	## 4.2 New recipe ----
	#two columns designated as 'ID' and not variables
	flights_rec <-
	recipe(arr_delay ~ ., data = train_data) %>%
	update_role(flight, time_hour, new_role = "ID")
	summary(flights_rec)
	# 5.0 FEATURE ENGINEERING ----
	flights_rec <-
	recipe(arr_delay ~ ., data = train_data) %>%
	update_role(flight, time_hour, new_role = "ID") %>%
	step_date(date, features = c("dow", "month")) %>%
	step_holiday(date, holidays = timeDate::listHolidays("US")) %>%
	step_rm(date) %>%
	step_dummy(all_nominal(), -all_outcomes()) %>%
	step_zv(all_predictors())
	# 6.0 FIT A MODEL WITH A RECIPE ----
	## 6.1 Specify model ----
	lr_mod <-
	logistic_reg() %>%
	set_engine("glm")
	## 6.2 Specify workflow ----
	flights_wflow <-
	workflow() %>%
	add_model(lr_mod) %>%
	add_recipe(flights_rec)
	flights_wflow
	## 6.3 Fit model ----
	flights_fit <-
	flights_wflow %>%
	fit(data = train_data)
	## 6.4 Extract results ----
	flights_fit %>%
	pull_workflow_fit() %>%
	tidy()
	# 7.0 USE A TRAINED WORKFLOW TO PREDICT ----
	## 7.1 Predict -- returns factor ----
	predict(flights_fit, test_data)
	## 7.2 Predict -- return probability ----
	flights_pred <-
	predict(flights_fit, test_data, type = "prob") %>%
	bind_cols(test_data %>% select(arr_delay, time_hour, flight))
	flights_pred
	## 7.3 Plot -- library('yardstick') ----
	flights_pred %>%
	roc_curve(truth = arr_delay, .pred_late) %>%
	autoplot()
	## 7.4 Area under ROC curve ----
	flights_pred %>%
	roc_auc(truth = arr_delay, .pred_late)

view raw 02_preprocess_with_recipes hosted with ❤ by GitHub

Third Tutorial

	#################################################################
	## tidymodels ##
	## 3 Evaluate Model with Resampling ##
	## url: https://www.tidymodels.org/start/resampling/ ##
	#################################################################

	# 1.0 INTRODUCTION ----
	#Resampling measures how well a model predicts new data
	#predict image segmentation quality
	## 1.1 Load tidymodels ----
	library(tidymodels) # for the resample package, along with the rest of tidymodels
	## 1.2 Load helper packages -----
	library(modeldata) # for the cells data
	# 2.0 THE CELL IMAGE DATA ----
	## 2.1 Load data ----
	data(cells, package = "modeldata")
	## 2.2 Outcome variable is 'class'
	# PS = "poorly segmented" WS = "weekly segmented"
	cells %>%
	count(class) %>%
	mutate(prop = n/sum(n))
	# 3.0 DATA SPLITTING ----
	#The function rsample::initial_split() takes the original data and saves
	#the information on how to make the partitions. In the original analysis,
	#the authors made their own training/test set and that information is
	#contained in the column "case". To demonstrate how to make a split, we’ll
	#remove this column before we make our own split:
	set.seed(123)
	cell_split <- rsample::initial_split(cells %>% select(-case),
	strata = class)
	#Here we used the strata argument, which conducts a stratified split. This
	#ensures that, despite the imbalance we noticed in our class variable, our
	#training and test data sets will keep roughly the same proportions of poorly #
	#and well-segmented cells as in the original data. After the initial_split,
	#the training() and testing() functions return the actual data sets.
	cell_train <- training(cell_split)
	cell_test <- testing(cell_split)
	# 4.0 CREATE MODEL
	#One of the benefits of a random forest model is that it is very low maintenance;
	#it requires very little preprocessing of the data and the default parameters
	#tend to give reasonable results. For that reason, we won’t create a recipe for
	#the cells data.
	rf_mod <-
	rand_forest(trees = 1000) %>%
	set_engine("ranger") %>%
	set_mode("classification")
	# 4.0 MODELING ----
	# This new rf_fit object is the fitted model, trained on the training data set
	set.seed(234)
	rf_fit <-
	rf_mod %>%
	fit(class ~ ., data = cell_train)
	# 5.0 ESTIMATING PERFORMANCE ----
	# Performance can be measure by overall classification accuracy and the Receiver Operating Characteristic (ROC) curve, and
	# The yardstick package has functions for computing both of these
	#measures called roc_auc() and accuracy(). Don't use the training set for this.
	#You must resample the training set to get reliable estimates.

	# 6.0 FIT A MODEL WITH RESAMPLING ----
	## 6.1 Fit model ----
	set.seed(345)
	folds <- vfold_cv(cell_train, v = 10)
	folds
	rf_wf <-
	workflow() %>%
	add_model(rf_mod) %>%
	add_formula(class ~ .)

	set.seed(456)
	rf_fit_rs <-
	rf_wf %>%
	fit_resamples(folds)
	## 6.2 Collect metrics ----
	collect_metrics(rf_fit_rs)
	# 7.0 CONCLUSION ----
	#Think about these values we now have for accuracy and AUC. These performance
	#metrics are now more realistic (i.e. lower) than our ill-advised first attempt
	#at computing performance metrics in the section above.
	rf_testing_pred <- # original bad idea
	predict(rf_fit, cell_test) %>%
	bind_cols(predict(rf_fit, cell_test, type = "prob")) %>%
	bind_cols(cell_test %>% select(class))
	rf_testing_pred %>% # test set predictions
	roc_auc(truth = class, .pred_PS)
	rf_testing_pred %>% # test set predictions
	accuracy(truth = class, .pred_class)

view raw 03_evaluate_model.R hosted with ❤ by GitHub

Fourth Tutorial

	#################################################################
	## tidymodels ##
	## 4 Tune model parameters ##
	## url: https://www.tidymodels.org/start/tuning/ ##
	#################################################################

	# 1.0 INTRODUCTION ----
	## 1.1 hyperparameters ----
	### 1.1.1 mtry()----
	### 1.1.2 learn_rate()----
	library(tidymodels) # for the tune package, along with the rest of tidymodels
	# Helper packages
	library(modeldata) # for the cells data
	library(vip) # for variable importance plots
	# 2.0 THE CELL IMAGE DATA, REVISITED ----
	#labeled by experts as well-segmented (WS) or poorly segmented (PS).
	data(cells, package = "modeldata")
	# 3.0 PREDICTING IMAGE SEGMENTATION, BUT BETTER ----
	#Random forest models are a tree-based ensemble method, and typically
	#perform well with default hyperparameters. However, the accuracy of
	#some other tree-based models, such as boosted tree models or decision
	#tree models, can be sensitive to the values of hyperparameters. In
	#this article, we will train a decision tree model.
	## 3.1 cost_complexity() ----
	#adds a cost, or penalty, to error rates of more complex trees
	## 3.2 tree_depth() ----
	#helps by stopping our tree from growing after it reaches
	#a certain depth.
	set.seed(123)
	cell_split <- initial_split(cells %>% select(-case),
	strata = class)
	cell_train <- training(cell_split)
	cell_test <- testing(cell_split)
	# 4.0 TUNING HYPERPARAMETERS ----
	## 4.1 identifies which hyperparameters we plan to tune ----
	tune_spec <-
	decision_tree(
	cost_complexity = tune(),
	tree_depth = tune()
	) %>%
	set_engine("rpart") %>%
	set_mode("classification")
	tune_spec
	## 4.2 dials::grid_regular() ----
	tree_grid <- grid_regular(cost_complexity(),
	tree_depth(),
	levels = 5)
	tree_grid
	## 4.3 Cross validation folds ----
	set.seed(234)
	cell_folds <- vfold_cv(cell_train)
	# 5.0 MODEL TUNING WITH A GRID ----
	## 5.1 workflow -----
	set.seed(345)
	tree_wf <- workflow() %>%
	add_model(tune_spec) %>%
	add_formula(class ~ .)
	## 5.2 add tuning grid ----
	tree_res <-
	tree_wf %>%
	tune_grid(
	resamples = cell_folds,
	grid = tree_grid
	)
	tree_res
	## 5.3 Plot ----
	tree_res %>%
	collect_metrics() %>%
	mutate(tree_depth = factor(tree_depth)) %>%
	ggplot(aes(cost_complexity, mean, color = tree_depth)) +
	geom_line(size = 1.5, alpha = 0.6) +
	geom_point(size = 2) +
	facet_wrap(~ .metric, scales = "free", nrow = 2) +
	scale_x_log10(labels = scales::label_number()) +
	scale_color_viridis_d(option = "plasma", begin = .9, end = 0)
	## 5.4 Select Best ----
	best_tree <- tree_res %>%
	select_best("roc_auc")
	best_tree

	# 6.0 FINALIZING OUR MODEL ----
	final_wf <-
	tree_wf %>%
	finalize_workflow(best_tree)
	final_wf


	# 7.0 EXPLORING RESULTS ----
	## 7.1 fit final model to training data ----
	final_tree <-
	final_wf %>%
	fit(data = cell_train)

	final_tree
	## 7.2 variable importance ----
	library(vip)
	final_tree %>%
	pull_workflow_fit() %>%
	vip(geom = "point")
	# 8.0 LAST FIT ----
	## 8.1 test data ----
	final_fit <-
	final_wf %>%
	last_fit(cell_split)
	## 8.2 collect metrics ----
	final_fit %>%
	collect_metrics()
	## 8.3 plot roc curve ----
	final_fit %>%
	collect_predictions() %>%
	roc_curve(class, .pred_PS) %>%
	autoplot()
	## 8.4 Other hyperparameters?? ----
	args(decision_tree)
	## 8.5 https://www.tidymodels.org/find/parsnip/#models ----

view raw 04_tune_model_parameters.R hosted with ❤ by GitHub

Fifth Tutorial

	#################################################################
	## tidymodels ##
	## 5 A predictive modeling case study ##
	## url: https://www.tidymodels.org/start/case-study/ ##
	#################################################################

	# 1.0 INTRODUCTION ----
	## 1.1 General ----
	library(tidymodels)
	## 1.2 Helper packages ----
	library(readr) # for importing data
	library(vip) # for variable importance plots
	# 2.0 HOTEL BOOKINGS DATA -- STAYS ONLY -- PREDICT CHILDREN ----
	## 2.1 Read-in ----
	hotels <-
	read_csv('https://tidymodels.org/start/case-study/hotels.csv') %>%
	mutate_if(is.character, as.factor)
	## 2.2 View ----
	glimpse(hotels)
	## 2.3 Outcome variable ----
	hotels %>%
	count(children) %>%
	mutate(prop = n/sum(n))
	# 8.3% of reservations
	# 3.0 DATA SPLITTING & RESAMPLING ----
	## 3.1 Split into stratified random sample----
	set.seed(123)
	splits <- initial_split(hotels, strata = children)
	hotel_other <- training(splits)
	hotel_test <- testing(splits)
	## 3.2 training set proportions by children ----
	hotel_other %>%
	count(children) %>%
	mutate(prop = n/sum(n))
	## 3.3 # test set proportions by children ----
	hotel_test %>%
	count(children) %>%
	mutate(prop = n/sum(n))
	## 3.4 validation_split() ----
	set.seed(234)
	val_set <- validation_split(hotel_other,
	strata = children,
	prop = 0.80)
	# 4.0 FIRST MODEL: PENALIZED LOGISTIC REGRESSION ----
	## 4.1 Build the model ----
	# tune() as placeholder
	# mixture = 1 removes irrelevant predictors
	lr_mod <-
	logistic_reg(penalty = tune(), mixture = 1) %>%
	set_engine("glmnet")
	## 4.2 Create recipe ----
	holidays <- c("AllSouls", "AshWednesday", "ChristmasEve", "Easter",
	"ChristmasDay", "GoodFriday", "NewYearsDay", "PalmSunday")

	lr_recipe <-
	recipe(children ~ ., data = hotel_other) %>%
	step_date(arrival_date) %>%
	step_holiday(arrival_date, holidays = holidays) %>%
	step_rm(arrival_date) %>%
	step_dummy(all_nominal(), -all_outcomes()) %>%
	step_zv(all_predictors()) %>%
	step_normalize(all_predictors())
	## 4.3 Create workflow
	lr_workflow <-
	workflow() %>%
	add_model(lr_mod) %>%
	add_recipe(lr_recipe)
	## 4.4 Create grid
	lr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30))
	lr_reg_grid %>% top_n(-5) # lowest penalty values
	lr_reg_grid %>% top_n(5) # highest penalty values
	## 4.5 Train & Tune ----
	lr_res <-
	lr_workflow %>%
	tune_grid(val_set,
	grid = lr_reg_grid,
	control = control_grid(save_pred = TRUE),
	metrics = metric_set(roc_auc))
	### 4.5.1 Plot ----
	lr_plot <-
	lr_res %>%
	collect_metrics() %>%
	ggplot(aes(x = penalty, y = mean)) +
	geom_point() +
	geom_line() +
	ylab("Area under the ROC Curve") +
	scale_x_log10(labels = scales::label_number())

	lr_plot



	# 5.0 SECOND MODEL: TREE-BASED ENSEMBLE ----
	# An effective and low-maintenance modeling technique is a random forest.
	# Tree-based models require very little preprocessing and can handle many types
	# of predictors (sparse, skewed, continuous, categorical, etc.).
	## 5.1 Build model reduce training time ----
	# The tune package can do parallel processing for you, and allows users
	# to use multiple cores or separate machines to fit models.
	### 5.1.1 Detect cores ----
	cores <- parallel::detectCores()
	cores
	### 5.1.2 Build model ----
	rf_mod <-
	rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>%
	# tune() is placeholder for later
	set_engine("ranger", num.threads = cores) %>%
	set_mode("classification")
	### 5.1.3 CAUTION: Don't set cores except for random forest ----
	## 5.2 Create Recipe ----
	#Unlike penalized logistic regression models, random forest models do
	#not require dummy or normalized predictor variables.
	rf_recipe <-
	recipe(children ~ ., data = hotel_other) %>%
	step_date(arrival_date) %>%
	step_holiday(arrival_date) %>%
	step_rm(arrival_date)
	## 5.3 Create Workflow ----
	rf_workflow <-
	workflow() %>%
	add_model(rf_mod) %>%
	add_recipe(rf_recipe)
	## 5.4 Train and Tune Model ----
	### 5.4.1 Show what will be tuned
	rf_mod %>%
	parameters()
	### 5.4.2 space-filling grid ----
	set.seed(345)
	rf_res <-
	rf_workflow %>%
	tune_grid(val_set,
	grid = 25,
	control = control_grid(save_pred = TRUE),
	metrics = metric_set(roc_auc))
	### 5.4.3 Show the best ----
	rf_res %>%
	show_best(metric = "roc_auc")
	### 5.4.4 Plot
	#However, the range of the y-axis indicates that the model is
	#very robust to the choice of these parameter values — all but
	#one of the ROC AUC values are greater than 0.90.
	autoplot(rf_res)
	### 5.4.5 Select best ----
	rf_best <-
	rf_res %>%
	select_best(metric = "roc_auc")
	rf_best
	### 5.4.6 Filter model to best prediction ----
	rf_auc <-
	rf_res %>%
	collect_predictions(parameters = rf_best) %>%
	roc_curve(children, .pred_children) %>%
	mutate(model = "Random Forest")
	### 5.4.7 Plot best model ----
	bind_rows(rf_auc, lr_auc) %>%
	ggplot(aes(x = 1 - specificity, y = sensitivity, col = model)) +
	geom_path(lwd = 1.5, alpha = 0.8) +
	geom_abline(lty = 3) +
	coord_equal() +
	scale_color_viridis_d(option = "plasma", end = .6)
	# error lr_auc was in previous lesson
	# 6.0 THE LAST FIT ----
	#build parsnip model object again from scratch
	#take our best hyperparameter values from our random forest model.
	# set new argument: importance = "impurity"
	## 6.1 last model ----
	last_rf_mod <-
	rand_forest(mtry = 8, min_n = 7, trees = 1000) %>%
	set_engine("ranger", num.threads = cores, importance = "impurity") %>%
	set_mode("classification")
	## 6.2 last workflow ----
	last_rf_workflow <-
	rf_workflow %>%
	update_model(last_rf_mod)
	## 6.3 last fit ----
	set.seed(345)
	last_rf_fit <-
	last_rf_workflow %>%
	last_fit(splits)
	## 6.4 evaluate model ----
	last_rf_fit %>%
	collect_metrics()
	## 6.5 review variable importance ----
	last_rf_fit %>%
	pluck(".workflow", 1) %>%
	pull_workflow_fit() %>%
	vip(num_features = 20)
	## 6.6 last roc ----
	#similar to validation set. good predictor on new data.
	last_rf_fit %>%
	collect_predictions() %>%
	roc_curve(children, .pred_children) %>%
	autoplot()
	# 7.0 RESOURCES ----
	# Kuhn & Silge: https://www.tmwr.org

view raw 05_model_case_study.R hosted with ❤ by GitHub

Conclusion

Everybody learns in their own way. Tutorials are really helpful and the tidymodels tutorials are great. After finishing those up, I hope to spend some time with the book, Tidy Modeling in R, and then do some more examples. No matter how many examples I try, I don’t ever feel comfortable modeling data. By converting the narrative format into R scripts it should make it easier to copy and paste and step through the code on your own. Enjoy.

Acknowledgements

This blog post was made possible thanks to:

Rstudio
Tidy Modeling with R
Tidymodels: tidy machine learning in R by Rebecca Barter

References

[1]

R Core Team, R: A language and environment for statistical computing. Vienna, Austria: R Foundation for Statistical Computing, 2020 [Online]. Available: https://www.R-project.org/

[2]

Y. Xie, C. Dervieux, and A. Presmanes Hill, Blogdown: Create blogs and websites with r markdown. 2021 [Online]. Available: https://CRAN.R-project.org/package=blogdown

[3]

M. Kuhn and H. Wickham, Tidymodels: Easily install and load the tidymodels packages. 2021 [Online]. Available: https://CRAN.R-project.org/package=tidymodels

[4]

H. Wickham, Tidyverse: Easily install and load the tidyverse. 2019 [Online]. Available: https://CRAN.R-project.org/package=tidyverse

Disclaimer

The views, analysis and conclusions presented within this paper represent the author’s alone and not of any other person, organization or government entity. While I have made every reasonable effort to ensure that the information in this article was correct, it will nonetheless contain errors, inaccuracies and inconsistencies. It is a working paper subject to revision without notice as additional information becomes available. Any liability is disclaimed as to any party for any loss, damage, or disruption caused by errors or omissions, whether such errors or omissions result from negligence, accident, or any other cause. The author(s) received no financial support for the research, authorship, and/or publication of this article.

Reproducibility

─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 3.6.3 (2020-02-29)
 os       macOS Catalina 10.15.7      
 system   x86_64, darwin15.6.0        
 ui       X11                         
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       America/Chicago             
 date     2021-05-30                  

─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
 package     * version date       lib source        
 assertthat    0.2.1   2019-03-21 [1] CRAN (R 3.6.0)
 blogdown    * 1.3     2021-04-14 [1] CRAN (R 3.6.2)
 bookdown      0.21    2020-10-13 [1] CRAN (R 3.6.3)
 bslib         0.2.4   2021-01-25 [1] CRAN (R 3.6.2)
 cachem        1.0.4   2021-02-13 [1] CRAN (R 3.6.2)
 callr         3.5.1   2020-10-13 [1] CRAN (R 3.6.2)
 cli           2.5.0   2021-04-26 [1] CRAN (R 3.6.2)
 codetools     0.2-18  2020-11-04 [1] CRAN (R 3.6.2)
 colorspace    2.0-1   2021-05-04 [1] CRAN (R 3.6.2)
 crayon        1.4.1   2021-02-08 [1] CRAN (R 3.6.2)
 DBI           1.1.1   2021-01-15 [1] CRAN (R 3.6.2)
 desc          1.3.0   2021-03-05 [1] CRAN (R 3.6.3)
 devtools    * 2.3.2   2020-09-18 [1] CRAN (R 3.6.2)
 digest        0.6.27  2020-10-24 [1] CRAN (R 3.6.2)
 dplyr         1.0.5   2021-03-05 [1] CRAN (R 3.6.3)
 ellipsis      0.3.2   2021-04-29 [1] CRAN (R 3.6.2)
 evaluate      0.14    2019-05-28 [1] CRAN (R 3.6.0)
 fansi         0.4.2   2021-01-15 [1] CRAN (R 3.6.2)
 farver        2.1.0   2021-02-28 [1] CRAN (R 3.6.3)
 fastmap       1.1.0   2021-01-25 [1] CRAN (R 3.6.2)
 fs            1.5.0   2020-07-31 [1] CRAN (R 3.6.2)
 generics      0.1.0   2020-10-31 [1] CRAN (R 3.6.2)
 ggplot2     * 3.3.3   2020-12-30 [1] CRAN (R 3.6.2)
 ggthemes    * 4.2.4   2021-01-20 [1] CRAN (R 3.6.2)
 glue          1.4.2   2020-08-27 [1] CRAN (R 3.6.2)
 gtable        0.3.0   2019-03-25 [1] CRAN (R 3.6.0)
 highr         0.8     2019-03-20 [1] CRAN (R 3.6.0)
 htmltools     0.5.1.1 2021-01-22 [1] CRAN (R 3.6.2)
 jquerylib     0.1.3   2020-12-17 [1] CRAN (R 3.6.2)
 jsonlite      1.7.2   2020-12-09 [1] CRAN (R 3.6.2)
 knitr         1.32    2021-04-14 [1] CRAN (R 3.6.2)
 labeling      0.4.2   2020-10-20 [1] CRAN (R 3.6.2)
 lifecycle     1.0.0   2021-02-15 [1] CRAN (R 3.6.2)
 magrittr      2.0.1   2020-11-17 [1] CRAN (R 3.6.2)
 memoise       2.0.0   2021-01-26 [1] CRAN (R 3.6.2)
 munsell       0.5.0   2018-06-12 [1] CRAN (R 3.6.0)
 pillar        1.6.0   2021-04-13 [1] CRAN (R 3.6.2)
 pkgbuild      1.2.0   2020-12-15 [1] CRAN (R 3.6.2)
 pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 3.6.0)
 pkgload       1.2.0   2021-02-23 [1] CRAN (R 3.6.3)
 prettyunits   1.1.1   2020-01-24 [1] CRAN (R 3.6.0)
 processx      3.4.5   2020-11-30 [1] CRAN (R 3.6.2)
 ps            1.6.0   2021-02-28 [1] CRAN (R 3.6.3)
 purrr         0.3.4   2020-04-17 [1] CRAN (R 3.6.2)
 R6            2.5.0   2020-10-28 [1] CRAN (R 3.6.2)
 remotes       2.3.0   2021-04-01 [1] CRAN (R 3.6.2)
 rlang         0.4.11  2021-04-30 [1] CRAN (R 3.6.2)
 rmarkdown     2.7     2021-02-19 [1] CRAN (R 3.6.3)
 rprojroot     2.0.2   2020-11-15 [1] CRAN (R 3.6.2)
 sass          0.3.1   2021-01-24 [1] CRAN (R 3.6.2)
 scales        1.1.1   2020-05-11 [1] CRAN (R 3.6.2)
 sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 3.6.0)
 stringi       1.5.3   2020-09-09 [1] CRAN (R 3.6.2)
 stringr       1.4.0   2019-02-10 [1] CRAN (R 3.6.0)
 testthat      3.0.2   2021-02-14 [1] CRAN (R 3.6.2)
 tibble        3.1.1   2021-04-18 [1] CRAN (R 3.6.2)
 tidyselect    1.1.0   2020-05-11 [1] CRAN (R 3.6.2)
 usethis     * 2.0.1   2021-02-10 [1] CRAN (R 3.6.2)
 utf8          1.2.1   2021-03-12 [1] CRAN (R 3.6.2)
 vctrs         0.3.8   2021-04-29 [1] CRAN (R 3.6.2)
 withr         2.4.2   2021-04-18 [1] CRAN (R 3.6.2)
 xfun          0.22    2021-03-11 [1] CRAN (R 3.6.2)
 yaml          2.2.1   2020-02-01 [1] CRAN (R 3.6.0)

[1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library

Tidymodel Tutorials As Scripts

Summary

Table of Contents