Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion h2o-algos/src/main/java/hex/api/MakeGLMModelHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import water.api.schemas3.KeyV3;
import water.fvec.*;
import water.fvec.Vec.VectorGroup;
import water.util.ArrayUtils;

import java.util.Arrays;
import java.util.Map;
Expand Down Expand Up @@ -77,7 +78,7 @@ public GLMModelV3 make_unrestricted_model(int version, MakeUnrestrictedGLMModelV
DataInfo dinfo = model.dinfo();
dinfo.setPredictorTransform(TransformType.NONE);
m._output = new GLMOutput(model.dinfo(), model._output._names, model._output._column_types, model._output._domains,
model._output.coefficientNames(), model._output.beta(), model._output._binomial, model._output._multinomial,
model._output.coefficientNames(), model._output._multinomial || model._output._ordinal? ArrayUtils.flattenArray(model._output.get_global_beta_multinomial()) : model._output.beta(), model._output._binomial, model._output._multinomial,
model._output._ordinal, null);
ModelMetrics mt = model._output._training_metrics_unrestricted_model;
ModelMetrics mv = model._output._validation_metrics_unrestricted_model;
Expand Down
34 changes: 24 additions & 10 deletions h2o-algos/src/main/java/hex/glm/GLM.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import Jama.Matrix;
import hex.*;
import hex.genmodel.utils.DistributionFamily;
import hex.glm.GLMModel.GLMOutput;
import hex.glm.GLMModel.GLMParameters.Family;
import hex.glm.GLMModel.GLMParameters.Link;
Expand Down Expand Up @@ -3400,17 +3401,23 @@ private void scorePostProcessingControlVal(Frame train, long t1) {
_model.addScoringInfo(_parms, nclasses(), t2, _state._iter); // add to scoringInfo for early stopping

if (_parms._generate_scoring_history) { // update scoring history with deviance train and valid if available
double likelihoodContrVal;
double[] betaContrVal = _model._output.getControlValBeta(_state.expandBeta(_state.beta()).clone());
GLMResDevTask task = new GLMResDevTask(_job._key, _dinfo, _parms, betaContrVal).doAll(_dinfo._adaptedFrame);
double objectiveControlVal = _state.objective(betaContrVal, task._likelihood);

if(_model._output.isMultinomialClassifier()){
GLMResDevTaskMultinomial task = new GLMResDevTaskMultinomial(_job._key, _dinfo, betaContrVal, nclasses()).doAll(_dinfo._adaptedFrame);
likelihoodContrVal = task._likelihood;
} else {
GLMResDevTask task = new GLMResDevTask(_job._key, _dinfo, _parms, betaContrVal).doAll(_dinfo._adaptedFrame);
likelihoodContrVal = task._likelihood;
}
double objectiveControlVal = _state.objective(betaContrVal, likelihoodContrVal);
if ((mtrain != null) && (_valid != null)) {
_scoringHistory.addIterationScore(true, true, _state._iter, task._likelihood,
objectiveControlVal, _state.deviance(task._likelihood), ((GLMMetrics) _model._output._validation_metrics).residual_deviance(),
_scoringHistory.addIterationScore(true, true, _state._iter, likelihoodContrVal,
objectiveControlVal, _state.deviance(likelihoodContrVal), ((GLMMetrics) _model._output._validation_metrics).residual_deviance(),
mtrain._nobs, _model._output._validation_metrics._nobs, _state.lambda(), _state.alpha());
} else { // only doing training deviance
_scoringHistory.addIterationScore(true, false, _state._iter, task._likelihood,
objectiveControlVal, _state.deviance(task._likelihood), Double.NaN, mtrain._nobs, 1, _state.lambda(),
_scoringHistory.addIterationScore(true, false, _state._iter, likelihoodContrVal,
objectiveControlVal, _state.deviance(likelihoodContrVal), Double.NaN, mtrain._nobs, 1, _state.lambda(),
Comment thread
maurever marked this conversation as resolved.
_state.alpha());
}
_job.update(_workPerIteration, _state.toString());
Expand Down Expand Up @@ -4036,9 +4043,16 @@ protected void updateProgress(boolean canScore) {
if (_model._parms._control_variables != null){
_scoringHistoryUnrestrictedModel.addIterationScore(_state._iter, _state.likelihood(), _state.objective());
double[] betaContrVal = _model._output.getControlValBeta(_state.expandBeta(_state.beta()).clone());
GLMResDevTask task = new GLMResDevTask(_job._key,_dinfo,_parms, betaContrVal).doAll(_state._dinfo._adaptedFrame);
double objectiveControlVal = _state.objective(betaContrVal, task._likelihood);
_scoringHistory.addIterationScore(_state._iter, task._likelihood, objectiveControlVal);
double likelihoosContrVal;
if(_model._output.isMultinomialClassifier()){
GLMResDevTaskMultinomial task = new GLMResDevTaskMultinomial(_job._key, _dinfo, betaContrVal, nclasses()).doAll(_state._dinfo._adaptedFrame);
likelihoosContrVal = task._likelihood;
} else {
GLMResDevTask task = new GLMResDevTask(_job._key,_dinfo,_parms, betaContrVal).doAll(_state._dinfo._adaptedFrame);
likelihoosContrVal = task._likelihood;
}
double objectiveControlVal = _state.objective(betaContrVal, likelihoosContrVal);
_scoringHistory.addIterationScore(_state._iter, likelihoosContrVal, objectiveControlVal);
Comment thread
maurever marked this conversation as resolved.
Outdated
} else {
_scoringHistory.addIterationScore(_state._iter, _state.likelihood(), _state.objective());
}
Expand Down
42 changes: 35 additions & 7 deletions h2o-algos/src/main/java/hex/glm/GLMModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -1646,6 +1646,28 @@ public double[] getControlValBeta(double[] beta){
}
return beta;
}

public double[][] getControlValBetaMultinomial(double[][] beta) {
if (_control_values_idxs_in_adapted_frame == null) {
mapControlVariables();
}
assert _control_values_idxs_in_adapted_frame != null;
for (int featureIdx : _control_values_idxs_in_adapted_frame) {
if (featureIdx < _dinfo._catOffsets.length - 1 && _column_types[featureIdx].equals("Enum")) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

&& _column_types[featureIdx].equals("Enum")

Is this part really necessary or is it just to be really really sure?

for (int i = _dinfo._catOffsets[featureIdx]; i < _dinfo._catOffsets[featureIdx + 1]; i++) {
for (int c = 0; c < beta.length; ++c) {
beta[c][i] = 0;
}
}
} else {
for (int c = 0; c < beta.length; ++c) {
featureIdx += _dinfo._numOffsets[0] - _dinfo._catOffsets.length + 1;
beta[c][featureIdx] = 0;
Comment thread
maurever marked this conversation as resolved.
Outdated
}
}
}
return beta;
}

public double[] stdErr() {
return calculateStdErrFromZValues(_zvalues, _global_beta);
Expand Down Expand Up @@ -1797,7 +1819,6 @@ public GLMOutput(DataInfo dinfo, String[] column_names, String[] column_types, S
mapControlVariables();
}


public GLMOutput() {
_isSupervised = true;
_nclasses = -1;
Expand Down Expand Up @@ -2020,9 +2041,13 @@ public VarImp calculateVarimp(boolean contrVal) {
float[] magnitudesSort = new float[len]; // stored sorted coefficient magnitudes
String[] namesSort = new String[len];

if (contrVal)
calculateVarimpBase(magnitudes, indices, getControlValBeta(getNormBeta()));
else if (_nclasses > 2)
if (contrVal) {
if(_nclasses > 2) {
calculateVarimpMultinomial(magnitudes, indices, getControlValBetaMultinomial(getNormBetaMultinomial()));
} else {
calculateVarimpBase(magnitudes, indices, getControlValBeta(getNormBeta()));
}
} else if (_nclasses > 2)
calculateVarimpMultinomial(magnitudes, indices, getNormBetaMultinomial());
else
calculateVarimpBase(magnitudes, indices, getNormBeta());
Expand Down Expand Up @@ -2147,9 +2172,13 @@ else if (_output.bestSubmodel().alpha_value == 1)
int icptInd = bm[0].length-1;
if (_parms._family == Family.ordinal) // only need one eta for all classes
classInd -= 1; // last class all zeros
double[][] bmcv = bm.clone();
if(_useControlVariables){
bmcv = _output.getControlValBetaMultinomial(bm);
}
for (int c = 0; c < classInd; ++c) {
double e = bm[c][icptInd]; // grab the intercept, replace the bm[0].length-1
double [] b = bm[c];
double e = bmcv[c][icptInd]; // grab the intercept, replace the bm[0].length-1
double [] b = bmcv[c];
for(int i = 0; i < _output._dinfo._cats; ++i) {
int l = _output._dinfo.getCategoricalId(i, data[i]);
if (l >= 0) e += b[l];
Expand Down Expand Up @@ -2203,7 +2232,6 @@ else if (_output.bestSubmodel().alpha_value == 1)
double[] bcv = b.clone();
if (this._useControlVariables)
bcv = _output.getControlValBeta(bcv); // make beta connected to control variables zero

for (int i = 0; i < _output._dinfo._cats && !Double.isNaN(eta); ++i) {
int l = _output._dinfo.getCategoricalId(i, data[i]);
if (l >= 0) eta += bcv[l];
Expand Down
9 changes: 6 additions & 3 deletions h2o-algos/src/main/java/hex/glm/GLMScore.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ public GLMScore(Job j, GLMModel m, DataInfo dinfo, String[] domain, boolean comp
if(_m._parms._family == GLMModel.GLMParameters.Family.multinomial ||
_m._parms._family == GLMModel.GLMParameters.Family.ordinal){
_beta = null;
_beta_multinomial = m._output._global_beta_multinomial;
if(m._useControlVariables){
_beta_multinomial = m._output.getControlValBetaMultinomial(m._output._global_beta_multinomial);
} else {
_beta_multinomial = m._output._global_beta_multinomial;
}
_dinfo = dinfo;
} else {
double[] beta = m.beta();
Expand All @@ -67,7 +71,6 @@ public GLMScore(Job j, GLMModel m, DataInfo dinfo, String[] domain, boolean comp
beta2[l] = beta[beta.length - 1];
beta = beta2;
}

if (m._useControlVariables) {
double[] betaContVar = m.beta().clone();
betaContVar = m._output.getControlValBeta(betaContVar);
Expand Down Expand Up @@ -116,7 +119,7 @@ public GLMScore(Job j, GLMModel m, DataInfo dinfo, String[] domain, boolean comp
preds[_nclasses] = 1-previousCDF;
preds[0] = ArrayUtils.maxIndex(preds)-1;
} else if (_m._parms._family == GLMModel.GLMParameters.Family.multinomial) {
double[] eta = _eta;
double[] eta = _eta.clone();
final double[][] bm = _beta_multinomial;
double sumExp = 0;
double maxRow = 0;
Expand Down
Loading