amices · stefvanbuuren · Apr 5, 2025 · Apr 5, 2025 · Apr 6, 2025 · Apr 6, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mice
 Type: Package
-Version: 3.17.3
+Version: 3.18.0.9000
 Title: Multivariate Imputation by Chained Equations
 Date: 2025-3-28
 Authors@R: c(person("Stef", "van Buuren", role = c("aut","cre"),
@@ -63,6 +63,7 @@ Imports:
 Suggests:
     broom.mixed,
     future, 
+    future.apply,
     furrr,
     haven,
     knitr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -163,6 +163,7 @@ export(pool.syn)
 export(pool.table)
 export(quickpred)
 export(rbind)
+export(record.event)
 export(squeeze)
 export(stripplot)
 export(supports.transparent)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,25 @@
+# mice 3.18.0.9000
+
+> **Experimental release**: Native support for parallel imputation.
+
+- The `mice()` function now supports parallel execution of imputations via the new `parallel = TRUE` argument. When enabled, instead of sequentially calculating `m` imputations at a given iteration, the `m` chains are distributed across available CPU cores using the `future` and `future.apply` frameworks. 
+- Parallel imputation may significantly reduce runtime, especially for large datasets and many imputations (`m`), but does not pay-off for small datasets or few imputations. 
+- Parallel execution is implemented only in the `mice()` function, and does not affect the `mice.impute.*()` functions.
+
+- To activate parallel execution:
+
+```
+library(mice)
+imp <- mice(data, parallel = TRUE)
+```
+
+- The default is `parallel = FALSE` for backward compatibility.
+- The argument `n.core` specifies the number of CPU cores to use. If `n.core` is not specified (default) the actual number of cores used is calculated as minimum(number of available cores - 1, number of imputations).
+-	`printFlag = TRUE` prints iteration and imputation number only in sequential mode; parallel mode reports timing per iteration.
+- Note: `mice()` will automatically select a parallel backend (default is `multisession`). To override, users may manually call `plan(...)` before running `mice()`. 
+- The `future` and `future.apply` packages must be installed to run parallel imputation. If not installed, `mice()` will throw an error and suggest installing the packages.
+- The wrappers `parlmice()` and `futuremice()` are still functional, but now throw a warning that they will be deprecated in the future. Users are encouraged to use the new `parallel` argument in `mice()` instead.
+
 # mice 3.17.3
 
 * Allow for negative adjusted R2 in `pool.r.squared()` (#700)

diff --git a/R/cbind.R b/R/cbind.R
@@ -18,8 +18,6 @@ cbind.mids <- function(x, y = NULL, ...) {
     y <- cbind.data.frame(y, dots)
   }
 
-  # Call is a vector, with first argument the mice statement
-  # and second argument the call to cbind.mids.
   call <- c(x$call, call)
 
   if (nrow(y) != nrow(x$data)) {
@@ -32,13 +30,9 @@ cbind.mids <- function(x, y = NULL, ...) {
   varnames <- make.unique(colnames(data))
   colnames(data) <- varnames
 
-  # where argument
   where <- cbind(x$where, matrix(FALSE, nrow = nrow(x$where), ncol = ncol(y)))
   colnames(where) <- varnames
 
-  # blocks: no renaming needed because all block definition will
-  # refer to varnames[1:ncol(x$data)] only, and are hence unique
-  # but we do need to rename duplicate block names
   yblocks <- vector("list", length = ncol(y))
   blocks <- c(x$blocks, yblocks)
   xynames <- c(names(x$blocks), colnames(y))
@@ -50,31 +44,28 @@ cbind.mids <- function(x, y = NULL, ...) {
 
   m <- x$m
 
-  # count the number of missing data in y
   nmis <- c(x$nmis, colSums(is.na(y)))
   names(nmis) <- varnames
 
-  # imp: original data of y will be copied into the multiple imputed dataset,
-  # including the missing values of y.
   r <- (!is.na(y))
   f <- function(j) {
-    m <- matrix(NA,
-                nrow = sum(!r[, j]),
-                ncol = x$m,
-                dimnames = list(row.names(y)[!r[, j]], seq_len(m))
+    mtx <- matrix(NA,
+                  nrow = sum(!r[, j]),
+                  ncol = x$m,
+                  dimnames = list(row.names(y)[!r[, j]], seq_len(m))
     )
-    as.data.frame(m)
+    as.data.frame(mtx)
   }
-  imp <- lapply(seq_len(ncol(y)), f)
-  imp <- c(x$imp, imp)
+  imp_y <- lapply(seq_len(ncol(y)), f)
+
+  imp <- vector("list", length(varnames))
   names(imp) <- varnames
+  imp[names(x$imp)] <- x$imp
+  imp[names(imp_y)] <- imp_y
 
-  # The imputation method for (columns in) y will be set to ''.
   method <- c(x$method, rep.int("", ncol(y)))
   names(method) <- blocknames
 
-  # The variable(s) in y are included in the predictorMatrix.
-  # y is not used as predictor as well as not imputed.
   predictorMatrix <- rbind(
     x$predictorMatrix,
     matrix(0,
@@ -99,8 +90,6 @@ cbind.mids <- function(x, y = NULL, ...) {
   blots <- x$blots
   ignore <- x$ignore
 
-  # seed, lastSeedValue, number of iterations, chainMean and chainVar
-  # is taken as in mids object x.
   seed <- x$seed
   lastSeedValue <- x$lastSeedValue
   iteration <- x$iteration
@@ -109,7 +98,6 @@ cbind.mids <- function(x, y = NULL, ...) {
 
   loggedEvents <- x$loggedEvents
 
-  ## save, and return
   midsobj <- mids(
     data = data,
     imp = imp,

diff --git a/R/complete.R b/R/complete.R
@@ -150,17 +150,16 @@ single.complete <- function(data, where, imp, ell) {
   if (is.null(where)) {
     where <- is.na(data)
   }
-  idx <- seq_len(ncol(data))[apply(where, 2, any)]
+  idx <- intersect(seq_len(ncol(data)), match(names(imp), colnames(data)))
   for (j in idx) {
-    if (is.null(imp[[j]])) {
-      data[where[, j], j] <- NA
+    varname <- colnames(data)[j]
+    if (is.null(imp[[varname]])) {
+      data[where[, varname], varname] <- NA
     } else {
-      if (sum(where[, j]) == nrow(imp[[j]])) {
-        # assume equal length
-        data[where[, j], j] <- imp[[j]][, ell]
+      if (sum(where[, varname]) == nrow(imp[[varname]])) {
+        data[where[, varname], varname] <- imp[[varname]][, ell]
       } else {
-        # index by rowname
-        data[as.numeric(rownames(imp[[j]])), j] <- imp[[j]][, ell]
+        data[as.numeric(rownames(imp[[varname]])), varname] <- imp[[varname]][, ell]
       }
     }
   }

diff --git a/R/edit.setup.R b/R/edit.setup.R
@@ -1,30 +1,24 @@
 mice.edit.setup <- function(data, setup,
-                       allow.na = FALSE,
-                       remove.constant = TRUE,
-                       remove.collinear = TRUE,
-                       remove_collinear = TRUE,
-                       ...) {
-  # legacy handling
+                            allow.na = FALSE,
+                            remove.constant = TRUE,
+                            remove.collinear = TRUE,
+                            remove_collinear = TRUE,
+                            ...,
+                            logenv = NULL) {
   if (!remove_collinear) remove.collinear <- FALSE
 
-  # edits the imputation model setup
-  # When it detec constant or collinear variables, write in loggedEvents
-  # and continues imputation with reduced model
-
   pred <- setup$predictorMatrix
   meth <- setup$method
   vis <- setup$visitSequence
   post <- setup$post
 
-  # FIXME: this function is not yet adapted to blocks
   if (ncol(pred) != nrow(pred) || length(meth) != nrow(pred) ||
-    ncol(data) != nrow(pred)) {
+      ncol(data) != nrow(pred)) {
     return(setup)
   }
 
   varnames <- colnames(data)
 
-  # remove constant variables but leave passive variables untouched
   for (j in seq_len(ncol(data))) {
     if (!is.passive(meth[j])) {
       d.j <- data[, j]
@@ -34,58 +28,50 @@ mice.edit.setup <- function(data, setup,
       } else {
         is.na(v) || v < 1000 * .Machine$double.eps
       }
-      didlog <- FALSE
+
       if (constant && any(pred[, j] != 0) && remove.constant) {
-        out <- varnames[j]
         pred[, j] <- 0
-        updateLog(out = out, meth = "constant")
-        didlog <- TRUE
+        record.event(out = varnames[j], meth = "constant", logenv = logenv)
       }
+
       if (constant && meth[j] != "" && remove.constant) {
-        out <- varnames[j]
         pred[j, ] <- 0
-        if (!didlog) {
-          updateLog(out = out, meth = "constant")
-        }
         meth[j] <- ""
         vis <- vis[vis != j]
         post[j] <- ""
+        record.event(out = varnames[j], meth = "constant", logenv = logenv)
       }
     }
   }
 
   ## remove collinear variables
   ispredictor <- apply(pred != 0, 2, any)
-  if (any(ispredictor)) {
-    droplist <- find.collinear(data[, ispredictor, drop = FALSE], ...)
+  droplist <- if (any(ispredictor)) {
+    find.collinear(data[, ispredictor, drop = FALSE], logenv = logenv, ...)
   } else {
-    droplist <- NULL
+    NULL
   }
+
   if (length(droplist) > 0) {
     for (k in seq_along(droplist)) {
       j <- which(varnames %in% droplist[k])
-      didlog <- FALSE
+
       if (any(pred[, j] != 0) && remove.collinear) {
-        # remove as predictor
-        out <- varnames[j]
         pred[, j] <- 0
-        updateLog(out = out, meth = "collinear")
-        didlog <- TRUE
+        record.event(out = varnames[j], meth = "collinear", logenv = logenv)
       }
+
       if (meth[j] != "" && remove.collinear) {
-        out <- varnames[j]
         pred[j, ] <- 0
-        if (!didlog) {
-          updateLog(out = out, meth = "collinear")
-        }
         meth[j] <- ""
         vis <- vis[vis != j]
         post[j] <- ""
+        record.event(out = varnames[j], meth = "collinear", logenv = logenv)
       }
     }
   }
 
-  if (all(pred == 0L) && didlog) {
+  if (all(pred == 0L)) {
     stop("`mice` detected constant and/or collinear variables. No predictors were left after their removal.")
   }
 

diff --git a/R/futuremice.R b/R/futuremice.R
@@ -1,5 +1,14 @@
 #' Wrapper function that runs MICE in parallel
 #'
+#' @description
+#' **Deprecated**: This function is deprecated as of `mice 3.18.0`. Please use
+#' \code{mice(..., parallel = TRUE)} instead, which integrates native support
+#' for parallel imputation via the \pkg{future} and \pkg{future.apply} frameworks.
+#'
+#' This wrapper is kept for backward compatibility and was based on the
+#' \pkg{furrr} package, using \code{future_map()} to distribute imputations
+#' across multiple R sessions. The output is combined via \code{\link{ibind}}.
+#'
 #' This is a wrapper function for \code{\link{mice}}, using multiple cores to
 #' execute \code{\link{mice}} in parallel. As a result, the imputation
 #' procedure can be sped up, which may be useful in general. By default,
@@ -46,7 +55,7 @@
 #' The default \code{multisession} resolves futures asynchronously (in parallel)
 #' in separate \code{R} sessions running in the background. See
 #' \code{\link[future]{plan}} for more information on future plans.
-#' @param packages A character vector with additional packages to be used in 
+#' @param packages A character vector with additional packages to be used in
 #' \code{mice} (e.g., for using external imputation functions).
 #' @param globals A character string with additional functions to be exported to
 #' each future (e.g., user-written imputation functions).
@@ -78,8 +87,14 @@
 #'
 #' @export
 futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,
-                       use.logical = TRUE, future.plan = "multisession", 
+                       use.logical = TRUE, future.plan = "multisession",
                        packages = NULL, globals = NULL, ...) {
+  warning(
+    "'futuremice()' is deprecated as of mice 3.18.0. ",
+    "Please use 'mice(..., parallel = TRUE)' instead.",
+    call. = FALSE
+  )
+
   # check if packages available
   install.on.demand("parallelly", ...)
   install.on.demand("furrr", ...)
@@ -136,7 +151,7 @@ futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,
     }
     parallelseed <- get(
       ".Random.seed",
-      envir = globalenv(), 
+      envir = globalenv(),
       mode = "integer",
       inherits = FALSE
     )
@@ -149,7 +164,7 @@ futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,
 
   # begin future
   imps <- furrr::future_map(
-    n.imp.core, 
+    n.imp.core,
     function(x) {
       mice(data = data,
            m = x,

diff --git a/R/initialize.imp.R b/R/initialize.imp.R
@@ -3,13 +3,15 @@ initialize.imp <- function(data, m, ignore, where, blocks, visitSequence,
   imp <- vector("list", ncol(data))
   names(imp) <- names(data)
   r <- !is.na(data)
+
   for (h in visitSequence) {
     for (j in blocks[[h]]) {
       y <- data[, j]
       ry <- r[, j] & !ignore
       wy <- where[, j]
       imp[[j]] <- as.data.frame(matrix(NA, nrow = sum(wy), ncol = m))
       dimnames(imp[[j]]) <- list(row.names(data)[wy], 1:m)
+
       if (method[h] != "") {
         for (i in seq_len(m)) {
           if (nmis[j] < nrow(data) && is.null(data.init)) {
@@ -18,14 +20,29 @@ initialize.imp <- function(data, m, ignore, where, blocks, visitSequence,
             imp[[j]][, i] <- data.init[wy, j]
           } else {
             if (is.factor(y)) {
-              imp[[j]][, i] <- sample(levels(y), nrow(data), replace = TRUE)
+              imp[[j]][, i] <- sample(levels(y), sum(wy), replace = TRUE)
             } else {
-              imp[[j]][, i] <- rnorm(nrow(data))
+              imp[[j]][, i] <- rnorm(sum(wy))
             }
           }
         }
       }
     }
   }
+
+  # Ensure imp[[j]] exists for any j used in where or blocks
+  vars_needed <- union(colnames(where)[colSums(where) > 0], unique(unlist(blocks)))
+  for (j in vars_needed) {
+    if (is.null(imp[[j]])) {
+      if (j %in% colnames(where)) {
+        wy <- where[, j]
+      } else {
+        wy <- rep(FALSE, nrow(data))
+      }
+      imp[[j]] <- as.data.frame(matrix(NA, nrow = sum(wy), ncol = m))
+      dimnames(imp[[j]]) <- list(row.names(data)[wy], as.character(seq_len(m)))
+    }
+  }
+
   imp
 }