Description
I ran the function shap_elimination.fit_compute(X, y), but the error occurs on line 493 of feature_elimination.py.
ValueError: all the input array dimensions except for the concatenation axis must match exactly.
What should I do if I can't do it even though I fit X and y in data frames and series formats, respectively?
Environment (please complete the following information):
- probatus version : 3.1.2
- python version : 3.9.0
- OS: macOS
To Reproduce
Put your code here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
feature = pd.read_csv("Feature.csv", sep = "\t")
feature.index = feature.loc[:, "Unnamed: 0"].tolist()
feature = feature.drop(columns = ["Unnamed: 0"]).transpose()
feature.shape
target = pd.read_csv("target.csv", sep = "\t")
target.index = target.loc[:, "geo_accession"].tolist()
target = target.drop(columns = ["Unnamed: 0", "geo_accession"])
target.shape
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
X_train, X_test, y_train, y_test = train_test_split(feature, target, test_size = 0.3,
stratify = target, random_state = 42)
train_median = np.median(X_train, axis = 0)
train_std = np.std(X_train, axis = 0)
X_train_scale = (X_train - train_median) / train_std
X_test_scale = (X_test - train_median) / train_std
X_train = pd.DataFrame(X_train_scale, index = X_train.index, columns = X_train.columns)
X_test = pd.DataFrame(X_test_scale, index = X_test.index, columns = X_test.columns)
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(y_train)
y_train_encoder = encoder.transform(y_train)
y_test_encoder = encoder.transform(y_test)
y_train = pd.Series(y_train_encoder, index = y_train.index)
y_test = pd.Series(y_test_encoder, index = y_test.index)
from sklearn.svm import SVC
from probatus.feature_elimination import ShapRFECV
plt.rcdefaults()
model = SVC(random_state = 42, probability = True, kernel = "linear")
cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 10, random_state = 123)
shap_elimination = ShapRFECV(model, step = 0.1, cv = cv,
scoring = 'accuracy', n_jobs = -1, random_state = 42)
report = shap_elimination.fit_compute(X_train, y_train, check_additivity = False)
performance_plot = shap_elimination.plot()
Error traceback
ValueError Traceback (most recent call last)
Cell In[10], line 11
7 cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 10, random_state = 123)
9 shap_elimination = ShapRFECV(model, step = 0.1, cv = cv,
10 scoring = 'accuracy', n_jobs = -1, random_state = 42)
---> 11 report = shap_elimination.fit_compute(X_train, y_train, check_additivity = False)
12 performance_plot = shap_elimination.plot()
File ~/miniforge3/envs/Project/lib/python3.9/site-packages/probatus/feature_elimination/feature_elimination.py:311, in ShapRFECV.fit_compute(self, X, y, sample_weight, columns_to_keep, column_names, shap_variance_penalty_factor, **shap_kwargs)
248 def fit_compute(
249 self,
250 X,
(...)
256 **shap_kwargs,
257 ):
258 """
259 Fits the object with the provided data.
260
(...)
308 DataFrame containing results of feature elimination from each iteration.
309 """
--> 311 self.fit(
312 X,
313 y,
314 sample_weight=sample_weight,
315 columns_to_keep=columns_to_keep,
316 column_names=column_names,
317 shap_variance_penalty_factor=shap_variance_penalty_factor,
318 **shap_kwargs,
319 )
320 return self.compute()
File ~/miniforge3/envs/Project/lib/python3.9/site-packages/probatus/feature_elimination/feature_elimination.py:493, in ShapRFECV.fit(self, X, y, sample_weight, columns_to_keep, column_names, groups, shap_variance_penalty_factor, **shap_kwargs)
491 shap_values = np.concatenate([current_result[0] for current_result in results_per_fold], axis=0)
492 else: # multi-class case
--> 493 shap_values = np.concatenate([current_result[0] for current_result in results_per_fold], axis=1)
495 scores_train = [current_result[1] for current_result in results_per_fold]
496 scores_val = [current_result[2] for current_result in results_per_fold]
File <array_function internals>:180, in concatenate(*args, **kwargs)
ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 194 and the array at index 9 has size 193