-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathrandomForestPredict_.daph
110 lines (95 loc) · 4.1 KB
/
randomForestPredict_.daph
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Modifications 2024 The DAPHNE Consortium.
#
#-------------------------------------------------------------
# This script has been manually translated from Apache SystemDS (https://github.com/apache/systemds).
# Original file: scripts/builtin/decisionTree.dml @ 4c97c1c376727f552a80c60217648c37d76fcb4e.
# This script implements random forest prediction for recoded and binned
# categorical and numerical input features.
#
# INPUT:
# ------------------------------------------------------------------------------
# X Feature matrix in recoded/binned representation
# y Label matrix in recoded/binned representation,
# optional for accuracy evaluation
# ctypes Row-Vector of column types [1 scale/ordinal, 2 categorical]
# M Matrix M holding the learned trees (one tree per row),
# see randomForest() for the detailed tree representation.
# verbose Flag indicating verbose debug output
# ------------------------------------------------------------------------------
#
# OUTPUT:
# ------------------------------------------------------------------------------
# yhat Label vector of predictions
# ------------------------------------------------------------------------------
import "decisionTreePredict_.daph";
import "lmPredictStats_.daph";
# TODO Support optional parameters with defaults (see #548).
def randomForestPredict(X:matrix<f64>, y:matrix<f64> /*= fill(0.0,0,0)*/,
ctypes:matrix<f64>, M:matrix<f64>, verbose:bool /*= false*/) -> matrix<f64>
{
t1 = now();
classify = as.scalar(ctypes[0,ncol(X)]) == 2;
yExists = (nrow(X)==nrow(y));
if(verbose) {
print("randomForestPredict: called for batch of "+nrow(X)+" rows, model of "
+nrow(M)+" trees, and with labels-provided "+yExists+".");
}
# scoring of num_tree decision trees
Ytmp = fill(0.0, nrow(M), nrow(X));
# TODO Support parfor-loops (see #515).
for(i in 1:nrow(M)) {
if( verbose )
print("randomForestPredict: start scoring tree "+i+"/"+nrow(M)+".");
# step 1: sample features (consistent with training)
I2 = M[i - 1, 0:ncol(X)];
Xi = X[[, t(I2)]];
# step 2: score decision tree
t2 = now();
ret = decisionTreePredict_.decisionTreePredict(Xi, ctypes, M[i - 1,ncol(X):ncol(M)], "TT", false);
#decisionTreePredict(X:matrix<f64>, y:matrix<f64> /*= fill(0.0,0,0)*/,
#ctypes:matrix<f64>, M:matrix<f64>, strategy:str /*="TT"*/, verbose:bool /*= false*/)
Ytmp[i - 1,0:nrow(ret)] = t(ret);
if( verbose )
print("-- ["+i+"] scored decision tree in "+as.f64(now()-t2)/1e9+" seconds.");
}
# label aggregation (majority voting / average)
yhat = fill(0.0, nrow(X), 1);
if( classify ) {
# TODO Support parfor-loops (see #515).
for(i in 1:nrow(X))
yhat[i - 1,0] = as.f64(idxMax(t(ctable(Ytmp[,i - 1] - 1, fill(0.0, nrow(Ytmp), 1))), 0) + 1);
}
else {
yhat = t(sum(Ytmp, 1)/nrow(M));
}
# summary statistics
if( yExists && verbose ) {
if( classify )
print("Accuracy (%): " + (sum(yhat == y) / nrow(y) * 100));
else
lmPredictStats_.lmPredictStats(yhat, y, false, verbose);
}
if(verbose) {
print("randomForestPredict: scored batch of "+nrow(X)+" rows with "+nrow(M)+" trees in "+as.f64(now()-t1)/1e9+" seconds.");
}
return yhat;
}