Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mice
Type: Package
Version: 3.18.0
Version: 3.18.0.9000
Title: Multivariate Imputation by Chained Equations
Date: 2025-05-27
Authors@R: c(person("Stef", "van Buuren", role = c("aut","cre"),
Expand Down Expand Up @@ -63,6 +63,7 @@ Imports:
Suggests:
broom.mixed,
future,
future.apply,
furrr,
haven,
knitr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ export(pool.syn)
export(pool.table)
export(quickpred)
export(rbind)
export(record.event)
export(squeeze)
export(stripplot)
export(supports.transparent)
Expand Down
22 changes: 22 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
# mice 3.18.0.9000

> **Experimental release**: Native support for parallel imputation.

- The `mice()` function now supports parallel execution of imputations via the new `parallel = TRUE` argument. When enabled, instead of sequentially calculating `m` imputations at a given iteration, the `m` chains are distributed across available CPU cores using the `future` and `future.apply` frameworks.
- Parallel imputation may significantly reduce runtime, especially for large datasets and many imputations (`m`), but does not pay-off for small datasets or few imputations.
- Parallel execution is implemented only in the `mice()` function, and does not affect the `mice.impute.*()` functions.

- To activate parallel execution:

```
library(mice)
imp <- mice(data, parallel = TRUE)
```

- The default is `parallel = FALSE` for backward compatibility.
- The argument `n.core` specifies the number of CPU cores to use. If `n.core` is not specified (default) the actual number of cores used is calculated as minimum(number of available cores - 1, number of imputations).
- `printFlag = TRUE` prints iteration and imputation number only in sequential mode; parallel mode reports timing per iteration.
- Note: `mice()` will automatically select a parallel backend (default is `multisession`). To override, users may manually call `plan(...)` before running `mice()`.
- The `future` and `future.apply` packages must be installed to run parallel imputation. If not installed, `mice()` will throw an error and suggest installing the packages.
- The wrappers `parlmice()` and `futuremice()` are still functional, but now throw a warning that they will be deprecated in the future. Users are encouraged to use the new `parallel` argument in `mice()` instead.

# mice 3.18.0

### Major changes
Expand Down
32 changes: 10 additions & 22 deletions R/cbind.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ cbind.mids <- function(x, y = NULL, ...) {
y <- cbind.data.frame(y, dots)
}

# Call is a vector, with first argument the mice statement
# and second argument the call to cbind.mids.
call <- c(x$call, call)

if (nrow(y) != nrow(x$data)) {
Expand All @@ -32,13 +30,9 @@ cbind.mids <- function(x, y = NULL, ...) {
varnames <- make.unique(colnames(data))
colnames(data) <- varnames

# where argument
where <- cbind(x$where, matrix(FALSE, nrow = nrow(x$where), ncol = ncol(y)))
colnames(where) <- varnames

# blocks: no renaming needed because all block definition will
# refer to varnames[1:ncol(x$data)] only, and are hence unique
# but we do need to rename duplicate block names
yblocks <- vector("list", length = ncol(y))
blocks <- c(x$blocks, yblocks)
xynames <- c(names(x$blocks), colnames(y))
Expand All @@ -50,31 +44,28 @@ cbind.mids <- function(x, y = NULL, ...) {

m <- x$m

# count the number of missing data in y
nmis <- c(x$nmis, colSums(is.na(y)))
names(nmis) <- varnames

# imp: original data of y will be copied into the multiple imputed dataset,
# including the missing values of y.
r <- (!is.na(y))
f <- function(j) {
m <- matrix(NA,
nrow = sum(!r[, j]),
ncol = x$m,
dimnames = list(row.names(y)[!r[, j]], seq_len(m))
mtx <- matrix(NA,
nrow = sum(!r[, j]),
ncol = x$m,
dimnames = list(row.names(y)[!r[, j]], seq_len(m))
)
as.data.frame(m)
as.data.frame(mtx)
}
imp <- lapply(seq_len(ncol(y)), f)
imp <- c(x$imp, imp)
imp_y <- lapply(seq_len(ncol(y)), f)

imp <- vector("list", length(varnames))
names(imp) <- varnames
imp[names(x$imp)] <- x$imp
imp[names(imp_y)] <- imp_y

# The imputation method for (columns in) y will be set to ''.
method <- c(x$method, rep.int("", ncol(y)))
names(method) <- blocknames

# The variable(s) in y are included in the predictorMatrix.
# y is not used as predictor as well as not imputed.
predictorMatrix <- rbind(
x$predictorMatrix,
matrix(0,
Expand All @@ -99,8 +90,6 @@ cbind.mids <- function(x, y = NULL, ...) {
blots <- x$blots
ignore <- x$ignore

# seed, lastSeedValue, number of iterations, chainMean and chainVar
# is taken as in mids object x.
seed <- x$seed
lastSeedValue <- x$lastSeedValue
iteration <- x$iteration
Expand All @@ -109,7 +98,6 @@ cbind.mids <- function(x, y = NULL, ...) {

loggedEvents <- x$loggedEvents

## save, and return
midsobj <- mids(
data = data,
imp = imp,
Expand Down
15 changes: 7 additions & 8 deletions R/complete.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,17 +150,16 @@ single.complete <- function(data, where, imp, ell) {
if (is.null(where)) {
where <- is.na(data)
}
idx <- seq_len(ncol(data))[apply(where, 2, any)]
idx <- intersect(seq_len(ncol(data)), match(names(imp), colnames(data)))
for (j in idx) {
if (is.null(imp[[j]])) {
data[where[, j], j] <- NA
varname <- colnames(data)[j]
if (is.null(imp[[varname]])) {
data[where[, varname], varname] <- NA
} else {
if (sum(where[, j]) == nrow(imp[[j]])) {
# assume equal length
data[where[, j], j] <- imp[[j]][, ell]
if (sum(where[, varname]) == nrow(imp[[varname]])) {
data[where[, varname], varname] <- imp[[varname]][, ell]
} else {
# index by rowname
data[as.numeric(rownames(imp[[j]])), j] <- imp[[j]][, ell]
data[as.numeric(rownames(imp[[varname]])), varname] <- imp[[varname]][, ell]
}
}
}
Expand Down
43 changes: 15 additions & 28 deletions R/edit.setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,23 @@ mice.edit.setup <- function(data, setup, user.visitSequence,
remove.constant = TRUE,
remove.collinear = TRUE,
remove_collinear = TRUE,
logenv = NULL,
...) {
# legacy handling
if (!remove_collinear) remove.collinear <- FALSE

# edits the imputation model setup
# When it detec constant or collinear variables, write in loggedEvents
# and continues imputation with reduced model

pred <- setup$predictorMatrix
meth <- setup$method
vis <- setup$visitSequence
post <- setup$post

# FIXME: this function is not yet adapted to blocks
if (ncol(pred) != nrow(pred) || length(meth) != nrow(pred) ||
ncol(data) != nrow(pred)) {
ncol(data) != nrow(pred)) {
return(setup)
}

varnames <- colnames(data)

# remove constant variables but leave passive variables untouched
for (j in seq_len(ncol(data))) {
if (!is.passive(meth[j])) {
d.j <- data[, j]
Expand All @@ -34,58 +29,50 @@ mice.edit.setup <- function(data, setup, user.visitSequence,
} else {
is.na(v) || v < 1000 * .Machine$double.eps
}
didlog <- FALSE

if (constant && any(pred[, j] != 0) && remove.constant) {
out <- varnames[j]
pred[, j] <- 0
updateLog(out = out, meth = "constant")
didlog <- TRUE
record.event(out = varnames[j], meth = "constant", logenv = logenv)
}

if (constant && meth[j] != "" && remove.constant) {
out <- varnames[j]
pred[j, ] <- 0
if (!didlog) {
updateLog(out = out, meth = "constant")
}
meth[j] <- ""
vis <- vis[vis != j]
post[j] <- ""
record.event(out = varnames[j], meth = "constant", logenv = logenv)
}
}
}

## remove collinear variables
ispredictor <- apply(pred != 0, 2, any)
if (any(ispredictor)) {
droplist <- find.collinear(data[, ispredictor, drop = FALSE], ...)
droplist <- if (any(ispredictor)) {
find.collinear(data[, ispredictor, drop = FALSE], logenv = logenv, ...)
} else {
droplist <- NULL
NULL
}

if (length(droplist) > 0) {
for (k in seq_along(droplist)) {
j <- which(varnames %in% droplist[k])
didlog <- FALSE

if (any(pred[, j] != 0) && remove.collinear) {
# remove as predictor
out <- varnames[j]
pred[, j] <- 0
updateLog(out = out, meth = "collinear")
didlog <- TRUE
record.event(out = varnames[j], meth = "collinear", logenv = logenv)
}

if (meth[j] != "" && remove.collinear) {
out <- varnames[j]
pred[j, ] <- 0
if (!didlog) {
updateLog(out = out, meth = "collinear")
}
meth[j] <- ""
vis <- vis[vis != j]
post[j] <- ""
record.event(out = varnames[j], meth = "collinear", logenv = logenv)
}
}
}

if (all(pred == 0L) && didlog) {
if (all(pred == 0L)) {
stop("`mice` detected constant and/or collinear variables. No predictors were left after their removal.")
}

Expand Down
23 changes: 19 additions & 4 deletions R/futuremice.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
#' Wrapper function that runs MICE in parallel
#'
#' @description
#' **Deprecated**: This function is deprecated as of `mice 3.18.0`. Please use
#' \code{mice(..., parallel = TRUE)} instead, which integrates native support
#' for parallel imputation via the \pkg{future} and \pkg{future.apply} frameworks.
#'
#' This wrapper is kept for backward compatibility and was based on the
#' \pkg{furrr} package, using \code{future_map()} to distribute imputations
#' across multiple R sessions. The output is combined via \code{\link{ibind}}.
#'
#' This is a wrapper function for \code{\link{mice}}, using multiple cores to
#' execute \code{\link{mice}} in parallel. As a result, the imputation
#' procedure can be sped up, which may be useful in general. By default,
Expand Down Expand Up @@ -46,7 +55,7 @@
#' The default \code{multisession} resolves futures asynchronously (in parallel)
#' in separate \code{R} sessions running in the background. See
#' \code{\link[future]{plan}} for more information on future plans.
#' @param packages A character vector with additional packages to be used in
#' @param packages A character vector with additional packages to be used in
#' \code{mice} (e.g., for using external imputation functions).
#' @param globals A character string with additional functions to be exported to
#' each future (e.g., user-written imputation functions).
Expand Down Expand Up @@ -78,8 +87,14 @@
#'
#' @export
futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,
use.logical = TRUE, future.plan = "multisession",
use.logical = TRUE, future.plan = "multisession",
packages = NULL, globals = NULL, ...) {
warning(
"'futuremice()' is deprecated as of mice 3.18.0. ",
"Please use 'mice(..., parallel = TRUE)' instead.",
call. = FALSE
)

# check if packages available
install.on.demand("parallelly", ...)
install.on.demand("furrr", ...)
Expand Down Expand Up @@ -136,7 +151,7 @@ futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,
}
parallelseed <- get(
".Random.seed",
envir = globalenv(),
envir = globalenv(),
mode = "integer",
inherits = FALSE
)
Expand All @@ -149,7 +164,7 @@ futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,

# begin future
imps <- furrr::future_map(
n.imp.core,
n.imp.core,
function(x) {
mice(data = data,
m = x,
Expand Down
21 changes: 19 additions & 2 deletions R/initialize.imp.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ initialize.imp <- function(data, m, ignore, where, blocks, visitSequence,
imp <- vector("list", ncol(data))
names(imp) <- names(data)
r <- !is.na(data)

for (h in visitSequence) {
for (j in blocks[[h]]) {
y <- data[, j]
ry <- r[, j] & !ignore
wy <- where[, j]
imp[[j]] <- as.data.frame(matrix(NA, nrow = sum(wy), ncol = m))
dimnames(imp[[j]]) <- list(row.names(data)[wy], 1:m)

if (method[h] != "") {
for (i in seq_len(m)) {
if (nmis[j] < nrow(data) && is.null(data.init)) {
Expand All @@ -18,14 +20,29 @@ initialize.imp <- function(data, m, ignore, where, blocks, visitSequence,
imp[[j]][, i] <- data.init[wy, j]
} else {
if (is.factor(y)) {
imp[[j]][, i] <- sample(levels(y), nrow(data), replace = TRUE)
imp[[j]][, i] <- sample(levels(y), sum(wy), replace = TRUE)
} else {
imp[[j]][, i] <- rnorm(nrow(data))
imp[[j]][, i] <- rnorm(sum(wy))
}
}
}
}
}
}

# Ensure imp[[j]] exists for any j used in where or blocks
vars_needed <- union(colnames(where)[colSums(where) > 0], unique(unlist(blocks)))
for (j in vars_needed) {
if (is.null(imp[[j]])) {
if (j %in% colnames(where)) {
wy <- where[, j]
} else {
wy <- rep(FALSE, nrow(data))
}
imp[[j]] <- as.data.frame(matrix(NA, nrow = sum(wy), ncol = m))
dimnames(imp[[j]]) <- list(row.names(data)[wy], as.character(seq_len(m)))
}
}

imp
}
Loading