Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Console Output Readability #14

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 28 additions & 25 deletions explainableai/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# explainableai/core.py
import colorama
from colorama import Fore, Style

# Initialize colorama
colorama.init(autoreset=True)

import pandas as pd
import numpy as np
Expand All @@ -21,7 +25,6 @@
from reportlab.platypus import PageBreak



class XAIWrapper:
def __init__(self):
self.model = None
Expand All @@ -47,11 +50,11 @@ def fit(self, models, X, y, feature_names=None):
self.feature_names = feature_names if feature_names is not None else X.columns.tolist()
self.is_classifier = all(hasattr(model, "predict_proba") for model in self.models.values())

print("Preprocessing data...")
print(f"{Fore.BLUE}Preprocessing data...{Style.RESET_ALL}")
self._preprocess_data()

print("Fitting models and analyzing...")
self.model_comparison_results = self._compare_models()
print(f"{Fore.BLUE}Fitting models and analyzing...{Style.RESET_ALL}")
self._analyze_models()

# Select the best model based on cv_score
best_model_name = max(self.model_comparison_results, key=lambda x: self.model_comparison_results[x]['cv_score'])
Expand Down Expand Up @@ -266,44 +269,44 @@ def _print_results(self, results):

@staticmethod
def perform_eda(df):
print("\nExploratory Data Analysis:")
print(f"Dataset shape: {df.shape}")
print("\nDataset info:")
print(f"{Fore.CYAN}Exploratory Data Analysis:{Style.RESET_ALL}")
print(f"{Fore.GREEN}Dataset shape: {df.shape}{Style.RESET_ALL}")
print(f"{Fore.CYAN}Dataset info:{Style.RESET_ALL}")
df.info()
print("\nSummary statistics:")
print(f"{Fore.CYAN}Summary statistics:{Style.RESET_ALL}")
print(df.describe())
print("\nMissing values:")
print(f"{Fore.CYAN}Missing values:{Style.RESET_ALL}")
print(df.isnull().sum())
print("\nData types:")
print(f"{Fore.CYAN}Data types:{Style.RESET_ALL}")
print(df.dtypes)
print("\nUnique values in each column:")
print(f"{Fore.CYAN}Unique values in each column:{Style.RESET_ALL}")
for col in df.columns:
print(f"{col}: {df[col].nunique()}")
print(f"{Fore.GREEN}{col}: {df[col].nunique()}{Style.RESET_ALL}")

# Additional EDA steps
print("\nCorrelation matrix:")
print(f"{Fore.CYAN}Correlation matrix:{Style.RESET_ALL}")
corr_matrix = df.select_dtypes(include=[np.number]).corr()
print(corr_matrix)

# Identify highly correlated features
high_corr = np.where(np.abs(corr_matrix) > 0.8)
high_corr_list = [(corr_matrix.index[x], corr_matrix.columns[y]) for x, y in zip(*high_corr) if x != y and x < y]
if high_corr_list:
print("\nHighly correlated features:")
print(f"{Fore.YELLOW}Highly correlated features:{Style.RESET_ALL}")
for feat1, feat2 in high_corr_list:
print(f"{feat1} - {feat2}: {corr_matrix.loc[feat1, feat2]:.2f}")
print(f"{Fore.GREEN}{feat1} - {feat2}: {corr_matrix.loc[feat1, feat2]:.2f}{Style.RESET_ALL}")

# Identify potential outliers
print("\nPotential outliers (values beyond 3 standard deviations):")
print(f"{Fore.CYAN}Potential outliers (values beyond 3 standard deviations):{Style.RESET_ALL}")
numeric_cols = df.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
mean = df[col].mean()
std = df[col].std()
outliers = df[(df[col] < mean - 3*std) | (df[col] > mean + 3*std)]
outliers = df[(df[col] < mean - 3 * std) | (df[col] > mean + 3 * std)]
if not outliers.empty:
print(f"{col}: {len(outliers)} potential outliers")
print(f"{Fore.GREEN}{col}: {len(outliers)} potential outliers{Style.RESET_ALL}")

# Class distribution for the target variable (assuming last column is target)
target_col = df.columns[-1]
print(f"\nClass distribution for target variable '{target_col}':")
print(df[target_col].value_counts(normalize=True))
print(f"{Fore.CYAN}Class distribution for target variable '{target_col}':{Style.RESET_ALL}")
print(df[target_col].value_counts(normalize=True))
29 changes: 29 additions & 0 deletions improve_console_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import colorama
from colorama import Fore, Style

# Initialize colorama
colorama.init(autoreset=True) # Ensures colors are reset automatically after each print

def analyze_model(accuracy):
# Enhanced print formatting for analysis messages
print(f"{Fore.BLUE}Analyzing model...{Style.RESET_ALL}")
print(f"{Fore.GREEN}Accuracy: {accuracy:.3f}")

def display_error(message):
# Use red for error messages
print(f"{Fore.RED}Error: {message}")

def display_warning(message):
# Use yellow for warning messages
print(f"{Fore.YELLOW}Warning: {message}")

def main():
accuracy = 0.91234 # Example accuracy value
analyze_model(accuracy)

# Example usage of enhanced error and warning messages
display_error("An error occurred while loading the model.")
display_warning("This model may take a long time to train.")

if __name__ == "__main__":
main()
Loading