tidymodels · topepo · Feb 16, 2026 · Feb 16, 2026 · Feb 16, 2026 · Feb 16, 2026
diff --git a/.github/workflows/bookdown.yaml b/.github/workflows/bookdown.yaml
@@ -36,6 +36,19 @@ jobs:
       - name: Build site
         run: Rscript -e 'bookdown::render_book("index.Rmd", quiet = TRUE)'
 
+      # Add these artifact upload steps
+      # - name: Upload build artifacts on failure
+      #   if: failure()
+      #   uses: actions/upload-artifact@v4
+      #   with:
+      #     name: build-artifacts-on-failure
+      #     path: |
+      #       ./_book/
+      #       RData/
+      #       *.log
+      #       *.Rmd
+      #     retention-days: 7
+
       - name: Deploy to Netlify
         if: contains(env.isExtPR, 'false')
         id: netlify-deploy

diff --git a/01-software-modeling.Rmd b/01-software-modeling.Rmd
@@ -73,8 +73,8 @@ resid_cols <- RColorBrewer::brewer.pal(8, "Set1")[1:2]
 
 # Red is where intensity is higher than expected
 plm_plot <- 
-  plm_resids %>% 
-  mutate(sign = ifelse(Intensity < 0, "low", "high")) %>% 
+  plm_resids |> 
+  mutate(sign = ifelse(Intensity < 0, "low", "high")) |> 
   ggplot(aes(x = x, y = y, fill = sign))  + 
   geom_tile(show.legend = FALSE) + 
   facet_wrap(~Sample) + 
@@ -257,23 +257,23 @@ monolog <-
   )
 if (knitr::is_html_output()) {
   tab <- 
-    monolog %>% 
-    dplyr::select(Thoughts, Activity) %>% 
+    monolog |> 
+    dplyr::select(Thoughts, Activity) |> 
     kable(
       caption = "Hypothetical inner monologue of a model developer.",
       label = "inner-monologue"
-    ) %>%
-    kable_styling() %>% 
-    column_spec(2, width = "25%") %>%
+    ) |>
+    kable_styling() |> 
+    column_spec(2, width = "25%") |>
     column_spec(1, width = "75%", italic = TRUE)
 } else {
   tab <- 
-    monolog %>% 
-    dplyr::select(Thoughts, Activity) %>% 
+    monolog |> 
+    dplyr::select(Thoughts, Activity) |> 
     kable(
       caption = "Hypothetical inner monologue of a model developer.",
       label = "inner-monologue"
-    ) %>%
+    ) |>
     kable_styling()
 }
 tab

diff --git a/02-tidyverse.Rmd b/02-tidyverse.Rmd
@@ -79,7 +79,7 @@ One important feature in the tibble produced by `r pkg(rsample)` is that the `sp
 
 ### Design for the pipe and functional programming
 
-The `r pkg(magrittr)` pipe operator (`%>%`) is a tool for chaining together a sequence of R functions.^[In R 4.1.0, a native pipe operator `|>` was introduced as well. In this book, we use the `r pkg(magrittr)` pipe since users on older versions of R will not have the new native pipe.] To demonstrate, consider the following commands that sort a data frame and then retain the first 10 rows:
+The `r pkg(magrittr)` pipe operator (`|>`) is a tool for chaining together a sequence of R functions.^[In R 4.1.0, a native pipe operator `|>` was introduced as well. In this book, we use the `r pkg(magrittr)` pipe since users on older versions of R will not have the new native pipe.] To demonstrate, consider the following commands that sort a data frame and then retain the first 10 rows:
 
 ```{r tidyverse-no-pipe, eval = FALSE}
 small_mtcars <- arrange(mtcars, gear)
@@ -93,8 +93,8 @@ The pipe operator substitutes the value of the left-hand side of the operator as
 
 ```{r tidyverse-pipe, eval = FALSE}
 small_mtcars <- 
-  mtcars %>% 
-  arrange(gear) %>% 
+  mtcars |> 
+  arrange(gear) |> 
   slice(1:10)
 ```
 
@@ -172,7 +172,7 @@ head(log_ratios)
 The `map()` functions also allow for temporary, anonymous functions defined using the tilde character. The argument values are `.x` and `.y` for `map2()`:
 
 ```{r map2-inline}
-map2_dbl(mtcars$mpg, mtcars$wt, ~ log(.x/.y)) %>% 
+map2_dbl(mtcars$mpg, mtcars$wt, ~ log(.x/.y)) |> 
   head()
 ```
 
@@ -228,8 +228,8 @@ train_cols <-
     rides = col_double()
   )
 num_combos <- 
-  read_delim(url, delim = ",", col_types = train_cols) %>% 
-  distinct(date, stationname) %>% 
+  read_delim(url, delim = ",", col_types = train_cols) |> 
+  distinct(date, stationname) |> 
   nrow()
 ```
 
@@ -261,19 +261,19 @@ url <- "https://data.cityofchicago.org/api/views/5neh-572f/rows.csv?accessType=D
 
 all_stations <- 
   # Step 1: Read in the data.
-  read_csv(url) %>% 
+  read_csv(url) |> 
   # Step 2: filter columns and rename stationname
-  dplyr::select(station = stationname, date, rides) %>% 
+  dplyr::select(station = stationname, date, rides) |> 
   # Step 3: Convert the character date field to a date encoding.
   # Also, put the data in units of 1K rides
-  mutate(date = mdy(date), rides = rides / 1000) %>% 
+  mutate(date = mdy(date), rides = rides / 1000) |> 
   # Step 4: Summarize the multiple records using the maximum.
-  group_by(date, station) %>% 
+  group_by(date, station) |> 
   summarize(rides = max(rides), .groups = "drop")
 ```
 
 This pipeline of operations illustrates why the tidyverse is popular. A series of data manipulations is used that have simple and easy to understand functions for each transformation; the series is bundled in a streamlined, readable way. The focus is on how the user interacts with the software. This approach enables more people to learn R and achieve their analysis goals, and adopting these same principles for modeling in R has the same benefits. 
 
 ## Chapter Summary 
 
-This chapter introduced the tidyverse, with a focus on applications for modeling and how tidyverse design principles inform the tidymodels framework. Think of the tidymodels framework as applying tidyverse principles to the domain of building models. We described differences in conventions between the tidyverse and base R, and introduced two important components of the tidyverse system, tibbles and the pipe operator `%>%`. Data cleaning and processing can feel mundane at times, but these tasks are important for modeling in the real world; we illustrated how to use tibbles, the pipe, and tidyverse functions in an example data import and processing exercise.  
+This chapter introduced the tidyverse, with a focus on applications for modeling and how tidyverse design principles inform the tidymodels framework. Think of the tidymodels framework as applying tidyverse principles to the domain of building models. We described differences in conventions between the tidyverse and base R, and introduced two important components of the tidyverse system, tibbles and the pipe operator `|>`. Data cleaning and processing can feel mundane at times, but these tasks are important for modeling in the real world; we illustrated how to use tibbles, the pipe, and tidyverse functions in an example data import and processing exercise.  
diff --git a/03-base-r.Rmd b/03-base-r.Rmd
@@ -245,14 +245,14 @@ prob_tbl <-
     "pamr.train()" , "pamr"       ,  'pamr.predict(object, type = "posterior")'    
   ) 
 
-prob_tbl %>% 
+prob_tbl |> 
   kable(
     caption = "Heterogeneous argument names for different modeling functions.",
     label = "probability-args",
     escape = FALSE
-  ) %>%
-  kable_styling(full_width = FALSE) %>%
-  column_spec(1, monospace = ifelse(prob_tbl$Function == "various", FALSE, TRUE)) %>%
+  ) |>
+  kable_styling(full_width = FALSE) |>
+  column_spec(1, monospace = ifelse(prob_tbl$Function == "various", FALSE, TRUE)) |>
   column_spec(3, monospace = TRUE)
 ```
 
@@ -294,7 +294,7 @@ A few existing R packages provide a unified interface to harmonize these heterog
 The `broom::tidy()` function, which we use throughout this book, is another tool for standardizing the structure of R objects. It can return many types of R objects in a more usable format. For example, suppose that predictors are being screened based on their correlation to the outcome column. Using `purrr::map()`, the results from `cor.test()` can be returned in a list for each predictor: 
 
 ```{r base-r-corr-list}
-corr_res <- map(mtcars %>% select(-mpg), cor.test, y = mtcars$mpg)
+corr_res <- map(mtcars |> select(-mpg), cor.test, y = mtcars$mpg)
 
 # The first of ten results in the vector: 
 corr_res[[1]]
@@ -311,9 +311,9 @@ tidy(corr_res[[1]])
 These results can be "stacked" and added to a `ggplot()`, as shown in Figure \@ref(fig:corr-plot). 
 
 ```{r base-r-corr-plot, eval=FALSE}
-corr_res %>% 
+corr_res |> 
   # Convert each to a tidy format; `map_dfr()` stacks the data frames 
-  map_dfr(tidy, .id = "predictor") %>% 
+  map_dfr(tidy, .id = "predictor") |> 
   ggplot(aes(x = fct_reorder(predictor, estimate))) + 
   geom_point(aes(y = estimate)) + 
   geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = .1) +
@@ -334,7 +334,7 @@ R modeling functions from the core language or other R packages can be used in c
 
 ```{r base-r-by-species-split}
 split_by_species <- 
-  crickets %>% 
+  crickets |> 
   group_nest(species) 
 split_by_species
 ```
@@ -343,17 +343,17 @@ The `data` column contains the `rate` and `temp` columns from `crickets` in a _l
 
 ```{r base-r-species-models}
 model_by_species <- 
-  split_by_species %>% 
+  split_by_species |> 
   mutate(model = map(data, ~ lm(rate ~ temp, data = .x)))
 model_by_species
 ```
 
 To collect the coefficients for each of these models, use `broom::tidy()` to convert them to a consistent data frame format so that they can be unnested:
 
 ```{r base-r-species-coefs}
-model_by_species %>% 
-  mutate(coef = map(model, tidy)) %>% 
-  select(species, coef) %>% 
+model_by_species |> 
+  mutate(coef = map(model, tidy)) |> 
+  select(species, coef) |> 
   unnest(cols = c(coef))
 ```
 

diff --git a/04-ames.Rmd b/04-ames.Rmd
@@ -89,7 +89,7 @@ The units of the model coefficients might be more difficult to interpret, as wil
 Despite these drawbacks, the models used in this book use the log transformation for this outcome. _From this point on_, the outcome column is prelogged in the `ames` data frame: 
 
 ```{r ames-log}
-ames <- ames %>% mutate(Sale_Price = log10(Sale_Price))
+ames <- ames |> mutate(Sale_Price = log10(Sale_Price))
 ```
 
 Another important aspect of these data for our modeling is their geographic locations. This spatial information is contained in the data in two ways: a qualitative `Neighborhood` label as well as quantitative longitude and latitude data. To visualize the spatial information, let's use both together to plot the data on a map in Figure \@ref(fig:ames-map).
@@ -184,5 +184,5 @@ The important code for preparing the Ames data set that we will carry forward in
 ```{r ames-summary, eval = FALSE}
 library(tidymodels)
 data(ames)
-ames <- ames %>% mutate(Sale_Price = log10(Sale_Price))
+ames <- ames |> mutate(Sale_Price = log10(Sale_Price))
 ``` 
diff --git a/05-data-spending.Rmd b/05-data-spending.Rmd
@@ -63,7 +63,7 @@ Simple random sampling is appropriate in many cases but there are exceptions. Wh
 #| fig.alt = "The distribution of the sale price (in log units) for the Ames housing data. The vertical lines indicate the quartiles of the data."
 
 sale_dens <- 
-  density(ames$Sale_Price, n = 2^10) %>% 
+  density(ames$Sale_Price, n = 2^10) |> 
   tidy() 
 quartiles <- quantile(ames$Sale_Price, probs = c(1:3)/4)
 quartiles <- tibble(prob = (1:3/4), value = unname(quartiles))
@@ -174,7 +174,7 @@ At this checkpoint, the important code snippets for preparing and splitting are:
 ```{r splitting-summary, eval = FALSE}
 library(tidymodels)
 data(ames)
-ames <- ames %>% mutate(Sale_Price = log10(Sale_Price))
+ames <- ames |> mutate(Sale_Price = log10(Sale_Price))
 
 set.seed(502)
 ames_split <- initial_split(ames, prop = 0.80, strata = Sale_Price)