Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 103 additions & 144 deletions Cargo.lock

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ crate-type = ["staticlib"]

[dependencies]
ppca = { path = "./ppca" }
pyo3 = { version = "0.18.3", features = ["extension-module"] }
numpy = { version = "0.18.0", features = ["nalgebra"] }
pyo3 = { version = "0.20.2", features = ["extension-module"] }
numpy = { version = "0.20.0", features = ["nalgebra"] }
bincode = "1.3.3"
rayon = "1.7.0"
nalgebra = "0.32.2"
rayon = "1.8.1"
nalgebra = "0.32.3"
rand = "0.8.5"
rand_distr = "0.4.3"
69 changes: 59 additions & 10 deletions ppca/src/dataset.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use bit_vec::BitVec;
use nalgebra::DVector;
use nalgebra::{DMatrix, DVector};
use rayon::prelude::*;
use serde_derive::{Deserialize, Serialize};
use std::{ops::Index, sync::Arc};
Expand All @@ -11,6 +11,8 @@ use crate::utils::Mask;
pub struct MaskedSample {
pub(crate) data: DVector<f64>,
pub(crate) mask: Mask,
pub(crate) prior_mean: Option<DVector<f64>>,
pub(crate) prior_cov: Option<DMatrix<f64>>,
}

impl MaskedSample {
Expand All @@ -24,14 +26,31 @@ impl MaskedSample {
/// Creates a masked sample from data and a mask. The value is considered missing if its index
/// in the masked is set to `false`.
pub fn new(data: DVector<f64>, mask: Mask) -> MaskedSample {
MaskedSample { data, mask }
MaskedSample {
data,
mask,
prior_mean: None,
prior_cov: None,
}
}

/// Creates a sample without any masked values.
pub fn unmasked(data: DVector<f64>) -> MaskedSample {
MaskedSample {
mask: Mask::unmasked(data.len()),
data,
prior_cov: None,
prior_mean: None,
}
}

/// Associates a prior to this sample:
pub fn with_prior(self, prior_mean: DVector<f64>, prior_cov: DMatrix<f64>) -> Self {
MaskedSample {
mask: self.mask,
data: self.data,
prior_mean: Some(prior_mean),
prior_cov: Some(prior_cov),
}
}

Expand Down Expand Up @@ -96,13 +115,13 @@ pub struct Dataset {
/// The weights associated with each sample. Use this only if you are using the PPCA as a
/// component of a greater EM scheme (or otherwise know what you are doing). Else, let the
/// package set it automatically to 1.
pub weights: Vec<f64>,
pub weights: Arc<Vec<f64>>,
}

impl From<Vec<MaskedSample>> for Dataset {
fn from(value: Vec<MaskedSample>) -> Self {
Dataset {
weights: vec![1.0; value.len()],
weights: Arc::new(vec![1.0; value.len()]),
data: Arc::new(value),
}
}
Expand Down Expand Up @@ -152,7 +171,7 @@ impl Dataset {
/// Creates a new dataset from a set of masked samples.
pub fn new(data: Vec<MaskedSample>) -> Dataset {
Dataset {
weights: vec![1.0; data.len()],
weights: Arc::new(vec![1.0; data.len()]),
data: Arc::new(data),
}
}
Expand All @@ -162,19 +181,49 @@ impl Dataset {
assert_eq!(data.len(), weights.len());
Dataset {
data: Arc::new(data),
weights,
weights: Arc::new(weights),
}
}

/// Creates a new dataset with the same sample, but with different weights. This operation is
/// cheap, since it does not clone the dataset (it's protected by an `Arc`).
pub fn with_weights(&self, weights: Vec<f64>) -> Dataset {
pub fn with_weights(self, weights: Vec<f64>) -> Dataset {
Dataset {
data: self.data.clone(),
weights,
data: self.data,
weights: Arc::new(weights),
}
}

pub fn with_priors(
mut self,
prior_means: Vec<DVector<f64>>,
prior_covs: Vec<DMatrix<f64>>,
) -> Dataset {
assert_eq!(
self.data.len(),
prior_means.len(),
"Prior mean length mismatch"
);
assert_eq!(
self.data.len(),
prior_covs.len(),
"Prior cov length mismatch"
);

let mut new_data = vec![];
self.data
.par_iter()
.zip(prior_means)
.zip(prior_covs)
.map(|((sample, prior_mean), prior_cov)| {
sample.clone().with_prior(prior_mean, prior_cov)
})
.collect_into_vec(&mut new_data);

self.data = Arc::new(new_data);
self
}

/// The length of this dataset.
pub fn len(&self) -> usize {
self.data.len()
Expand All @@ -193,7 +242,7 @@ impl Dataset {
/// Lists the dimensions which as masked in __all__ samples in this dataset.
pub fn empty_dimensions(&self) -> Vec<usize> {
let Some(n_dimensions) = self.data.first().map(|sample| sample.mask().0.len()) else {
return vec![]
return vec![];
};
let new_mask = || BitVec::from_elem(n_dimensions, false);
let poormans_or = |mut this: BitVec, other: &BitVec| {
Expand Down
6 changes: 3 additions & 3 deletions ppca/src/mix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ impl PPCAMix {
dataset
.data
.par_iter()
.zip(&dataset.weights)
.zip(&*dataset.weights)
.map(|(sample, &weight)| weight * self.llk_one(sample))
.sum::<f64>()
}
Expand Down Expand Up @@ -291,7 +291,7 @@ impl PPCAMix {
// Log-posteriors for this particular model.
let log_posteriors: Vec<_> = log_posteriors
.par_iter()
.zip(&dataset.weights)
.zip(&*dataset.weights)
.filter(|&(_, &wi)| wi > 0.0)
.map(|(lp, &wi)| wi.ln() + lp[i])
.collect();
Expand All @@ -311,7 +311,7 @@ impl PPCAMix {
.collect();
let logsum_posteriors =
unnorm_posteriors.iter().copied().sum::<f64>().ln() + max_posterior;
let dataset = dataset.with_weights(unnorm_posteriors);
let dataset = dataset.clone().with_weights(unnorm_posteriors);

(model.iterate_with_prior(&dataset, prior), logsum_posteriors)
})
Expand Down
7 changes: 7 additions & 0 deletions ppca/src/output_covariance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ impl<'a> OutputCovariance<'a> {
}
}

pub(crate) fn transform_state(&self, transform: &DMatrix<f64>) -> OutputCovariance<'static> {
OutputCovariance {
isotropic_noise: self.isotropic_noise,
transform: Cow::Owned(&*self.transform * transform),
}
}

pub(crate) fn quadratic_form(&self, x: &DVector<f64>) -> f64 {
let norm_squared = x.norm_squared();
let transpose_transformed = self.transform.transpose() * x;
Expand Down
63 changes: 43 additions & 20 deletions ppca/src/ppca_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ impl PPCAModel {
dataset
.data
.par_iter()
.zip(&dataset.weights)
.zip(&*dataset.weights)
.map(|(sample, weight)| self.llk_one(sample) * weight)
.sum()
}
Expand Down Expand Up @@ -174,10 +174,7 @@ impl PPCAModel {
.collect::<BitVec>(),
);

MaskedSample {
data: mask.fillna(&(sampled_state + noise)),
mask,
}
MaskedSample::new(mask.fillna(&(sampled_state + noise)), mask)
}

/// Sample a full dataset from the PPCA model and masks each entry according to a
Expand All @@ -197,13 +194,39 @@ impl PPCAModel {
return self.uninferred();
}

let sub_sample = sample.mask.mask(&(sample.data_vector() - &self.0.mean));
let sub_covariance = self.0.output_covariance.masked(&sample.mask);
let mean = if let Some(prior_mean) = &sample.prior_mean {
&self.0.mean + &*self.0.output_covariance.transform * prior_mean
} else {
self.0.mean.clone()
};

InferredMasked {
model: self.clone(),
state: sub_covariance.estimator_transform() * sub_sample,
covariance: sub_covariance.estimator_covariance(),
let sub_sample = sample.mask.mask(&(sample.data_vector() - mean));

if let Some(prior_cov) = &sample.prior_cov {
let chol = prior_cov
.clone()
.cholesky()
.expect("Prior should be Cholesky-decomposable");
let l = chol.l();

let sub_covariance = self
.0
.output_covariance
.masked(&sample.mask)
.transform_state(&l);

InferredMasked {
model: self.clone(),
state: &l * sub_covariance.estimator_transform() * sub_sample,
covariance: &l * sub_covariance.estimator_covariance() * l.transpose(),
}
} else {
let sub_covariance = self.0.output_covariance.masked(&sample.mask);
InferredMasked {
model: self.clone(),
state: sub_covariance.estimator_transform() * sub_sample,
covariance: sub_covariance.estimator_covariance(),
}
}
}

Expand All @@ -229,7 +252,7 @@ impl PPCAModel {
dataset
.data
.par_iter()
.zip(&dataset.weights)
.zip(&*dataset.weights)
.map(|(sample, &weight)| (self.smooth_one(sample), weight))
.collect()
}
Expand All @@ -246,7 +269,7 @@ impl PPCAModel {
dataset
.data
.par_iter()
.zip(&dataset.weights)
.zip(&*dataset.weights)
.map(|(sample, &weight)| (self.extrapolate_one(sample), weight))
.collect()
}
Expand All @@ -272,7 +295,7 @@ impl PPCAModel {
let total_cross_moment = dataset
.data
.par_iter()
.zip(&dataset.weights)
.zip(&*dataset.weights)
.zip(&inferred)
.map(|((sample, &weight), inferred)| {
let centered_filled = sample.mask.fillna(&(sample.data_vector() - &self.0.mean));
Expand All @@ -288,7 +311,7 @@ impl PPCAModel {
let total_second_moment = dataset
.data
.iter()
.zip(&dataset.weights)
.zip(&*dataset.weights)
.zip(&inferred)
.filter(|((sample, _), _)| sample.mask.0[idx])
.map(|((_, &weight), inferred)| weight * inferred.second_moment())
Expand Down Expand Up @@ -319,7 +342,7 @@ impl PPCAModel {
let (square_error, deviations_square_sum, total_deviation, totals) = dataset
.data
.par_iter()
.zip(&dataset.weights)
.zip(&*dataset.weights)
.zip(&inferred)
.filter(|((sample, _), _)| !sample.is_empty())
.map(
Expand Down Expand Up @@ -664,9 +687,9 @@ mod test {
#[test]
fn test_llk() {
let model = toy_model();
dbg!(model.llk(&Dataset::new(vec![MaskedSample {
data: dvector![1.0, 2.0, 3.0],
mask: Mask(BitVec::from_elem(3, true)),
}])));
dbg!(model.llk(&Dataset::new(vec![MaskedSample::new(
dvector![1.0, 2.0, 3.0],
Mask(BitVec::from_elem(3, true))
)])));
}
}
Loading