Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mice
Type: Package
Version: 3.17.3
Version: 3.18.0.9000
Title: Multivariate Imputation by Chained Equations
Date: 2025-3-28
Authors@R: c(person("Stef", "van Buuren", role = c("aut","cre"),
Expand Down Expand Up @@ -63,6 +63,7 @@ Imports:
Suggests:
broom.mixed,
future,
future.apply,
furrr,
haven,
knitr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ export(pool.syn)
export(pool.table)
export(quickpred)
export(rbind)
export(record.event)
export(squeeze)
export(stripplot)
export(supports.transparent)
Expand Down
22 changes: 22 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
# mice 3.18.0.9000

> **Experimental release**: Native support for parallel imputation.

- The `mice()` function now supports parallel execution of imputations via the new `parallel = TRUE` argument. When enabled, instead of sequentially calculating `m` imputations at a given iteration, the `m` chains are distributed across available CPU cores using the `future` and `future.apply` frameworks.
- Parallel imputation may significantly reduce runtime, especially for large datasets and many imputations (`m`), but does not pay-off for small datasets or few imputations.
- Parallel execution is implemented only in the `mice()` function, and does not affect the `mice.impute.*()` functions.

- To activate parallel execution:

```
library(mice)
imp <- mice(data, parallel = TRUE)
```

- The default is `parallel = FALSE` for backward compatibility.
- The argument `n.core` specifies the number of CPU cores to use. If `n.core` is not specified (default) the actual number of cores used is calculated as minimum(number of available cores - 1, number of imputations).
- `printFlag = TRUE` prints iteration and imputation number only in sequential mode; parallel mode reports timing per iteration.
- Note: `mice()` will automatically select a parallel backend (default is `multisession`). To override, users may manually call `plan(...)` before running `mice()`.
- The `future` and `future.apply` packages must be installed to run parallel imputation. If not installed, `mice()` will throw an error and suggest installing the packages.
- The wrappers `parlmice()` and `futuremice()` are still functional, but now throw a warning that they will be deprecated in the future. Users are encouraged to use the new `parallel` argument in `mice()` instead.

# mice 3.17.3

* Allow for negative adjusted R2 in `pool.r.squared()` (#700)
Expand Down
32 changes: 10 additions & 22 deletions R/cbind.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ cbind.mids <- function(x, y = NULL, ...) {
y <- cbind.data.frame(y, dots)
}

# Call is a vector, with first argument the mice statement
# and second argument the call to cbind.mids.
call <- c(x$call, call)

if (nrow(y) != nrow(x$data)) {
Expand All @@ -32,13 +30,9 @@ cbind.mids <- function(x, y = NULL, ...) {
varnames <- make.unique(colnames(data))
colnames(data) <- varnames

# where argument
where <- cbind(x$where, matrix(FALSE, nrow = nrow(x$where), ncol = ncol(y)))
colnames(where) <- varnames

# blocks: no renaming needed because all block definition will
# refer to varnames[1:ncol(x$data)] only, and are hence unique
# but we do need to rename duplicate block names
yblocks <- vector("list", length = ncol(y))
blocks <- c(x$blocks, yblocks)
xynames <- c(names(x$blocks), colnames(y))
Expand All @@ -50,31 +44,28 @@ cbind.mids <- function(x, y = NULL, ...) {

m <- x$m

# count the number of missing data in y
nmis <- c(x$nmis, colSums(is.na(y)))
names(nmis) <- varnames

# imp: original data of y will be copied into the multiple imputed dataset,
# including the missing values of y.
r <- (!is.na(y))
f <- function(j) {
m <- matrix(NA,
nrow = sum(!r[, j]),
ncol = x$m,
dimnames = list(row.names(y)[!r[, j]], seq_len(m))
mtx <- matrix(NA,
nrow = sum(!r[, j]),
ncol = x$m,
dimnames = list(row.names(y)[!r[, j]], seq_len(m))
)
as.data.frame(m)
as.data.frame(mtx)
}
imp <- lapply(seq_len(ncol(y)), f)
imp <- c(x$imp, imp)
imp_y <- lapply(seq_len(ncol(y)), f)

imp <- vector("list", length(varnames))
names(imp) <- varnames
imp[names(x$imp)] <- x$imp
imp[names(imp_y)] <- imp_y

# The imputation method for (columns in) y will be set to ''.
method <- c(x$method, rep.int("", ncol(y)))
names(method) <- blocknames

# The variable(s) in y are included in the predictorMatrix.
# y is not used as predictor as well as not imputed.
predictorMatrix <- rbind(
x$predictorMatrix,
matrix(0,
Expand All @@ -99,8 +90,6 @@ cbind.mids <- function(x, y = NULL, ...) {
blots <- x$blots
ignore <- x$ignore

# seed, lastSeedValue, number of iterations, chainMean and chainVar
# is taken as in mids object x.
seed <- x$seed
lastSeedValue <- x$lastSeedValue
iteration <- x$iteration
Expand All @@ -109,7 +98,6 @@ cbind.mids <- function(x, y = NULL, ...) {

loggedEvents <- x$loggedEvents

## save, and return
midsobj <- mids(
data = data,
imp = imp,
Expand Down
15 changes: 7 additions & 8 deletions R/complete.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,17 +150,16 @@ single.complete <- function(data, where, imp, ell) {
if (is.null(where)) {
where <- is.na(data)
}
idx <- seq_len(ncol(data))[apply(where, 2, any)]
idx <- intersect(seq_len(ncol(data)), match(names(imp), colnames(data)))
for (j in idx) {
if (is.null(imp[[j]])) {
data[where[, j], j] <- NA
varname <- colnames(data)[j]
if (is.null(imp[[varname]])) {
data[where[, varname], varname] <- NA
} else {
if (sum(where[, j]) == nrow(imp[[j]])) {
# assume equal length
data[where[, j], j] <- imp[[j]][, ell]
if (sum(where[, varname]) == nrow(imp[[varname]])) {
data[where[, varname], varname] <- imp[[varname]][, ell]
} else {
# index by rowname
data[as.numeric(rownames(imp[[j]])), j] <- imp[[j]][, ell]
data[as.numeric(rownames(imp[[varname]])), varname] <- imp[[varname]][, ell]
}
}
}
Expand Down
54 changes: 20 additions & 34 deletions R/edit.setup.R
Original file line number Diff line number Diff line change
@@ -1,30 +1,24 @@
mice.edit.setup <- function(data, setup,
allow.na = FALSE,
remove.constant = TRUE,
remove.collinear = TRUE,
remove_collinear = TRUE,
...) {
# legacy handling
allow.na = FALSE,
remove.constant = TRUE,
remove.collinear = TRUE,
remove_collinear = TRUE,
...,
logenv = NULL) {
if (!remove_collinear) remove.collinear <- FALSE

# edits the imputation model setup
# When it detec constant or collinear variables, write in loggedEvents
# and continues imputation with reduced model

pred <- setup$predictorMatrix
meth <- setup$method
vis <- setup$visitSequence
post <- setup$post

# FIXME: this function is not yet adapted to blocks
if (ncol(pred) != nrow(pred) || length(meth) != nrow(pred) ||
ncol(data) != nrow(pred)) {
ncol(data) != nrow(pred)) {
return(setup)
}

varnames <- colnames(data)

# remove constant variables but leave passive variables untouched
for (j in seq_len(ncol(data))) {
if (!is.passive(meth[j])) {
d.j <- data[, j]
Expand All @@ -34,58 +28,50 @@ mice.edit.setup <- function(data, setup,
} else {
is.na(v) || v < 1000 * .Machine$double.eps
}
didlog <- FALSE

if (constant && any(pred[, j] != 0) && remove.constant) {
out <- varnames[j]
pred[, j] <- 0
updateLog(out = out, meth = "constant")
didlog <- TRUE
record.event(out = varnames[j], meth = "constant", logenv = logenv)
}

if (constant && meth[j] != "" && remove.constant) {
out <- varnames[j]
pred[j, ] <- 0
if (!didlog) {
updateLog(out = out, meth = "constant")
}
meth[j] <- ""
vis <- vis[vis != j]
post[j] <- ""
record.event(out = varnames[j], meth = "constant", logenv = logenv)
}
}
}

## remove collinear variables
ispredictor <- apply(pred != 0, 2, any)
if (any(ispredictor)) {
droplist <- find.collinear(data[, ispredictor, drop = FALSE], ...)
droplist <- if (any(ispredictor)) {
find.collinear(data[, ispredictor, drop = FALSE], logenv = logenv, ...)
} else {
droplist <- NULL
NULL
}

if (length(droplist) > 0) {
for (k in seq_along(droplist)) {
j <- which(varnames %in% droplist[k])
didlog <- FALSE

if (any(pred[, j] != 0) && remove.collinear) {
# remove as predictor
out <- varnames[j]
pred[, j] <- 0
updateLog(out = out, meth = "collinear")
didlog <- TRUE
record.event(out = varnames[j], meth = "collinear", logenv = logenv)
}

if (meth[j] != "" && remove.collinear) {
out <- varnames[j]
pred[j, ] <- 0
if (!didlog) {
updateLog(out = out, meth = "collinear")
}
meth[j] <- ""
vis <- vis[vis != j]
post[j] <- ""
record.event(out = varnames[j], meth = "collinear", logenv = logenv)
}
}
}

if (all(pred == 0L) && didlog) {
if (all(pred == 0L)) {
stop("`mice` detected constant and/or collinear variables. No predictors were left after their removal.")
}

Expand Down
23 changes: 19 additions & 4 deletions R/futuremice.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
#' Wrapper function that runs MICE in parallel
#'
#' @description
#' **Deprecated**: This function is deprecated as of `mice 3.18.0`. Please use
#' \code{mice(..., parallel = TRUE)} instead, which integrates native support
#' for parallel imputation via the \pkg{future} and \pkg{future.apply} frameworks.
#'
#' This wrapper is kept for backward compatibility and was based on the
#' \pkg{furrr} package, using \code{future_map()} to distribute imputations
#' across multiple R sessions. The output is combined via \code{\link{ibind}}.
#'
#' This is a wrapper function for \code{\link{mice}}, using multiple cores to
#' execute \code{\link{mice}} in parallel. As a result, the imputation
#' procedure can be sped up, which may be useful in general. By default,
Expand Down Expand Up @@ -46,7 +55,7 @@
#' The default \code{multisession} resolves futures asynchronously (in parallel)
#' in separate \code{R} sessions running in the background. See
#' \code{\link[future]{plan}} for more information on future plans.
#' @param packages A character vector with additional packages to be used in
#' @param packages A character vector with additional packages to be used in
#' \code{mice} (e.g., for using external imputation functions).
#' @param globals A character string with additional functions to be exported to
#' each future (e.g., user-written imputation functions).
Expand Down Expand Up @@ -78,8 +87,14 @@
#'
#' @export
futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,
use.logical = TRUE, future.plan = "multisession",
use.logical = TRUE, future.plan = "multisession",
packages = NULL, globals = NULL, ...) {
warning(
"'futuremice()' is deprecated as of mice 3.18.0. ",
"Please use 'mice(..., parallel = TRUE)' instead.",
call. = FALSE
)

# check if packages available
install.on.demand("parallelly", ...)
install.on.demand("furrr", ...)
Expand Down Expand Up @@ -136,7 +151,7 @@ futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,
}
parallelseed <- get(
".Random.seed",
envir = globalenv(),
envir = globalenv(),
mode = "integer",
inherits = FALSE
)
Expand All @@ -149,7 +164,7 @@ futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,

# begin future
imps <- furrr::future_map(
n.imp.core,
n.imp.core,
function(x) {
mice(data = data,
m = x,
Expand Down
21 changes: 19 additions & 2 deletions R/initialize.imp.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ initialize.imp <- function(data, m, ignore, where, blocks, visitSequence,
imp <- vector("list", ncol(data))
names(imp) <- names(data)
r <- !is.na(data)

for (h in visitSequence) {
for (j in blocks[[h]]) {
y <- data[, j]
ry <- r[, j] & !ignore
wy <- where[, j]
imp[[j]] <- as.data.frame(matrix(NA, nrow = sum(wy), ncol = m))
dimnames(imp[[j]]) <- list(row.names(data)[wy], 1:m)

if (method[h] != "") {
for (i in seq_len(m)) {
if (nmis[j] < nrow(data) && is.null(data.init)) {
Expand All @@ -18,14 +20,29 @@ initialize.imp <- function(data, m, ignore, where, blocks, visitSequence,
imp[[j]][, i] <- data.init[wy, j]
} else {
if (is.factor(y)) {
imp[[j]][, i] <- sample(levels(y), nrow(data), replace = TRUE)
imp[[j]][, i] <- sample(levels(y), sum(wy), replace = TRUE)
} else {
imp[[j]][, i] <- rnorm(nrow(data))
imp[[j]][, i] <- rnorm(sum(wy))
}
}
}
}
}
}

# Ensure imp[[j]] exists for any j used in where or blocks
vars_needed <- union(colnames(where)[colSums(where) > 0], unique(unlist(blocks)))
for (j in vars_needed) {
if (is.null(imp[[j]])) {
if (j %in% colnames(where)) {
wy <- where[, j]
} else {
wy <- rep(FALSE, nrow(data))
}
imp[[j]] <- as.data.frame(matrix(NA, nrow = sum(wy), ncol = m))
dimnames(imp[[j]]) <- list(row.names(data)[wy], as.character(seq_len(m)))
}
}

imp
}
Loading