scripts/algorithms/lmDS_.daph

#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Modifications 2023 The DAPHNE Consortium.
#
#-------------------------------------------------------------

# This script has been manually translated from Apache SystemDS.

# The lmDC function solves linear regression using the direct solve method
#
# INPUT:
# --------------------------------------------------------------------------------------
# X        Matrix of feature vectors.
# y        1-column matrix of response values.
# icpt     Intercept presence, shifting and rescaling the columns of X
# reg      Regularization constant (lambda) for L2-regularization. set to nonzero
#          for highly dependant/sparse/numerous features
# verbose  If TRUE print messages are activated
# --------------------------------------------------------------------------------------
#
# OUTPUT:
# ---------------------------------------------------------------------------------------------
# B     The model fit
# ---------------------------------------------------------------------------------------------

// TODO Support optional parameters with defaults (see #548).
def lmDS(X:matrix<f64>, y:matrix<f64>, icpt:si64 /*= 0*/, reg:f64 /*= 1e-7*/,
 verbose:bool /*= true*/) -> matrix<f64> {
  intercept_status = icpt;
  regularization = reg;

  n = nrow (X);
  m = ncol (X);
  ones_n = fill (1.0, n, 1);
  zero_cell = [0.0];

  # Introduce the intercept, shift and rescale the columns of X if needed

  m_ext = m;
  if (intercept_status == 1 || intercept_status == 2)  # add the intercept column
  {
    X = cbind (X, ones_n);
    m_ext = ncol (X);
  }

  scale_lambda = fill (1.0, m_ext, 1);
  if (intercept_status == 1 || intercept_status == 2)
  {
    scale_lambda [m_ext - 1, 0] = [0.0];
  }

  scale_X = [0.0]; # TODO this should not be necessary
  shift_X = [0.0]; # TODO this should not be necessary
  if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
  {                           # Important assumption: X [, m_ext - 1] = ones_n
    avg_X_cols = t(sum(X, 1)) / n;
    var_X_cols = (t(sum (X ^ 2.0, 1)) - n * (avg_X_cols ^ 2.0)) / (n - 1);
    is_unsafe = (var_X_cols <= 0);
    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
    scale_X [m_ext - 1, 0] = [1.0];
    # TODO unary minus
    shift_X = (0 - avg_X_cols) * scale_X;
    shift_X [m_ext - 1, 0] = [0.0];
  } else {
    scale_X = fill (1.0, m_ext, 1);
    shift_X = fill (0.0, m_ext, 1);
  }

  # Henceforth, if intercept_status == 2, we use "X @ (SHIFT/SCALE TRANSFORM)"
  # instead of "X".  However, in order to preserve the sparsity of X,
  # we apply the transform associatively to some other part of the expression
  # in which it occurs.  To avoid materializing a large matrix, we rewrite it:
  #
  # ssX_A  = (SHIFT/SCALE TRANSFORM) @ A    --- is rewritten as:
  # ssX_A  = diagMatrix (scale_X) @ A;
  # ssX_A [m_ext - 1, ] = ssX_A [m_ext - 1, ] + t(shift_X) @ A;
  #
  # tssX_A = t(SHIFT/SCALE TRANSFORM) @ A   --- is rewritten as:
  # tssX_A = diagMatrix (scale_X) @ A + shift_X @ A [m_ext - 1, ];

  lambda = scale_lambda * regularization;
  # BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL)
  A = t(X) @ X;
  b = t(X) @ y;
  if (intercept_status == 2) {
    A = t(diagMatrix (scale_X) @ A + shift_X @ A [m_ext - 1, ]);
    A =   diagMatrix (scale_X) @ A + shift_X @ A [m_ext - 1, ];
    b =   diagMatrix (scale_X) @ b + shift_X @ b [m_ext - 1, ];
  }
  A = A + diagMatrix (lambda);

  if (verbose)
	  print ("Calling the Direct Solver...");

  beta_unscaled = solve (A, b);

  # END THE DIRECT SOLVE ALGORITHM
  beta = [0.0]; # TODO this should not be necessary
  if (intercept_status == 2) {
      beta = scale_X * beta_unscaled;
      beta [m_ext - 1, ] = beta [m_ext - 1, ] + t(shift_X) @ beta_unscaled;
  } else {
      beta = beta_unscaled;
  }
  
  if (verbose) { 
	  print ("Computing the statistics...");
	  avg_tot = sum (y) / n;
	  ss_tot = sum (y ^ 2);
	  ss_avg_tot = ss_tot - n * avg_tot ^ 2;
	  var_tot = ss_avg_tot / (n - 1);
	  y_residual = y - X @ beta;
	  avg_res = sum (y_residual) / n;
	  ss_res = sum (y_residual ^ 2);
	  ss_avg_res = ss_res - n * avg_res ^ 2;
	
	  R2 = 1 - ss_res / ss_avg_tot;
	  dispersion = (n > m_ext) ? (ss_res / (n - m_ext)) : nan;
	  adjusted_R2 = (n > m_ext) ? (1 - dispersion / (ss_avg_tot / (n - 1))) : nan;
	
	  R2_nobias = 1 - ss_avg_res / ss_avg_tot;
	  deg_freedom = n - m - 1;
    var_res = 0.0; # TODO this should not be necessary
    adjusted_R2_nobias = 0.0; # TODO this should not be necessary
	  if (deg_freedom > 0) {
	    var_res = ss_avg_res / deg_freedom;
	    adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1));
	  } else {
	    var_res = nan;
	    adjusted_R2_nobias = nan;
	    print ("Warning: zero or negative number of degrees of freedom.");
	  }
	
	  R2_vs_0 = 1 - ss_res / ss_tot;
	  adjusted_R2_vs_0 = (n > m) ? (1 - (ss_res / (n - m)) / (ss_tot / n)) : nan;

    print ("AVG_TOT_Y, " + avg_tot +                 # Average of the response value Y
      "\nSTDEV_TOT_Y, " + sqrt (var_tot) +           # Standard Deviation of the response value Y
      "\nAVG_RES_Y, " + avg_res +                    # Average of the residual Y - pred(Y|X), i.e. residual bias
      "\nSTDEV_RES_Y, " + sqrt (var_res) +           # Standard Deviation of the residual Y - pred(Y|X)
      "\nDISPERSION, " + dispersion +                # GLM-style dispersion, i.e. residual sum of squares / # d.f.
      "\nR2, " + R2 +                                # R^2 of residual with bias included vs. total average
      "\nADJUSTED_R2, " + adjusted_R2 +              # Adjusted R^2 of residual with bias included vs. total average
      "\nR2_NOBIAS, " + R2_nobias +                  # R^2 of residual with bias subtracted vs. total average<Paste>
      "\nADJUSTED_R2_NOBIAS, " + adjusted_R2_nobias);  # Adjusted R^2 of residual with bias subtracted vs. total average
    if (intercept_status == 0) {
      print ("R2_VS_0, " + R2_vs_0 +               #  R^2 of residual with bias included vs. zero constant
        "\nADJUSTED_R2_VS_0, " + adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero constant
    }
  }

  B = beta;
  return B;
}