Skip to content

Tensorflow Can now Be used with software package #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions MachineLearningMCMC/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from config_reader import ConfigReader
from machine_learning.ml_factory import MLFactory
from machine_learning.fml_interface import FmlInterface
from machine_learning.scikit_interface import SciKitInterface
from file_handling.chain_handler import ChainHandler
40 changes: 4 additions & 36 deletions MachineLearningMCMC/__main__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import yaml
import argparse

from file_handling.chain_handler import ChainHandler
from machine_learning.ml_factory import MLFactory
from config_reader import ConfigReader

if __name__=="__main__":

Expand All @@ -11,36 +8,7 @@

args = parser.parse_args()

with open(args.config, 'r') as c:
yaml_config = yaml.safe_load(c)

# Process MCMC chain
file_handler = ChainHandler(yaml_config["FileSettings"]["FileName"],
yaml_config["FileSettings"]["ChainName"],
yaml_config["FileSettings"]["Verbose"])

file_handler.ignore_plots(yaml_config["FileSettings"]["IgnoredParameters"])
file_handler.add_additional_plots(yaml_config["FileSettings"]["ParameterNames"])
file_handler.add_additional_plots(yaml_config["FileSettings"]["LabelName"], True)

file_handler.add_new_cuts(yaml_config["FileSettings"]["ParameterCuts"])

file_handler.convert_ttree_to_array()

factory = MLFactory(file_handler, yaml_config["FileSettings"]["LabelName"])
if yaml_config["FitterSettings"]["FitterPackage"].lower() == "scikit":
interface = factory.setup_scikit_model(yaml_config["FitterSettings"]["FitterName"],
**yaml_config["FitterSettings"]["FitterKwargs"])

else:
raise ValueError("Input not recognised!")

if yaml_config["FitterSettings"].get("AddFromExternalModel"):
external_model = yaml_config["FitterSettings"]["ExternalModel"]
interface.load_model(external_model)

interface.set_training_test_set(yaml_config["FitterSettings"]["TestSize"])
config_reader = ConfigReader(args.config)
config_reader()

interface.train_model()
interface.test_model()
interface.save_model(yaml_config["FileSettings"]["ModelOutputName"])

73 changes: 73 additions & 0 deletions MachineLearningMCMC/config_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import yaml

from file_handling.chain_handler import ChainHandler
from machine_learning.ml_factory import MLFactory
from machine_learning.fml_interface import FmlInterface


class ConfigReader:

# Strictly unecessary but nice conceptually
_file_handler = None
_interface = None

def __init__(self, config: str):
with open(config, 'r') as c:
self._yaml_config = yaml.safe_load(c)


def setup_file_handler(self)->None:
# Process MCMC chain
self._file_handler = ChainHandler(self._yaml_config["FileSettings"]["FileName"],
self._yaml_config["FileSettings"]["ChainName"],
self._yaml_config["FileSettings"]["Verbose"])

self._file_handler.ignore_plots(self._yaml_config["FileSettings"]["IgnoredParameters"])
self._file_handler.add_additional_plots(self._yaml_config["FileSettings"]["ParameterNames"])
self._file_handler.add_additional_plots(self._yaml_config["FileSettings"]["LabelName"], True)

self._file_handler.add_new_cuts(self._yaml_config["FileSettings"]["ParameterCuts"])

self._file_handler.convert_ttree_to_array()


def setup_ml_interface(self)->None:
if self._file_handler is None:
raise Exception("Cannot initialise ML interface without first setting up file handler!")


factory = MLFactory(self._file_handler, self._yaml_config["FileSettings"]["LabelName"])
if self._yaml_config["FitterSettings"]["FitterPackage"].lower() == "scikit":
self._interface = factory.setup_scikit_model(self._yaml_config["FitterSettings"]["FitterName"],
**self._yaml_config["FitterSettings"]["FitterKwargs"])

elif self._yaml_config["FitterSettings"]["FitterPackage"].lower() == "tensorflow":
self._interface = factory.setup_tensorflow_model(self._yaml_config["FitterSettings"]["FitterName"],
**self._yaml_config["FitterSettings"]["FitterKwargs"])

else:
raise ValueError("Input not recognised!")

if self._yaml_config["FitterSettings"].get("AddFromExternalModel"):
external_model = self._yaml_config["FitterSettings"]["ExternalModel"]
self._interface.load_model(external_model)

else:
self._interface.set_training_test_set(self._yaml_config["FitterSettings"]["TestSize"])

self._interface.train_model()
self._interface.test_model()
self._interface.save_model(self._yaml_config["FileSettings"]["ModelOutputName"])

def __call__(self) -> None:
self.setup_file_handler()
self.setup_ml_interface()


@property
def chain_handler(self)->ChainHandler | None:
return self._file_handler

@property
def ml_interface(self)->FmlInterface | None:
return self._interface
34 changes: 31 additions & 3 deletions MachineLearningMCMC/machine_learning/fml_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
from astropy.visualization import LogStretch
from astropy.visualization.mpl_normalize import ImageNormalize

import pickle

Expand Down Expand Up @@ -60,6 +58,10 @@ def model(self)->Any:
# Returns model being used
return self._model

@property
def training_data(self)->pd.DataFrame:
return self._training_data

def add_model(self, ml_model: Any)->None:
# Add ML model into your interface
self._model = ml_model
Expand Down Expand Up @@ -89,10 +91,28 @@ def load_model(self, input_file: str):
print(f"Attempting to load file from {input_file}")
with open(input_file, 'r') as f:
self._model = pickle.load(f)

def test_model(self):

if self._model is None:
raise ValueError("No Model has been set!")

if self._test_data is None or self._test_labels is None:
raise ValueError("No test data set")

prediction = self.model_predict(self._test_data)
test_as_numpy = self._test_labels.to_numpy().T[0]

self.evaluate_model(prediction, test_as_numpy)


def evaluate_model(self, predicted_values, true_values, outfile: str=""):
print(predicted_values)
print(true_values)

print(f"Mean Absolute Error : {metrics.mean_absolute_error(predicted_values,true_values)}")


lobf = np.poly1d(np.polyfit(predicted_values, true_values, 1))

print(f"Line of best fit : y={lobf.c[0]}x + {lobf.c[1]}")
Expand Down Expand Up @@ -124,4 +144,12 @@ def evaluate_model(self, predicted_values, true_values, outfile: str=""):

print(f"Saving QQ to {outfile}")

fig.savefig(outfile)
fig.savefig(outfile)
plt.close()

# Gonna draw a hist
difs = true_values-predicted_values
print(f"mean: {np.mean(difs)}, std dev: {np.std(difs)}")
plt.hist(difs, bins=100, density=True, range=(np.std(difs)*-5, np.std(difs)*5))
plt.xlabel("True - Pred")
plt.savefig(f"diffs_5sigma_range_{outfile}")
34 changes: 23 additions & 11 deletions MachineLearningMCMC/machine_learning/ml_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
from typing import Any, Dict

from machine_learning.scikit_interface import SciKitInterface
from machine_learning.tf_interface import TfInterface
import sklearn.ensemble as ske
import tensorflow.keras as tfk

from file_handling.chain_handler import ChainHandler
from functools import partial



class MLFactory:
# Implement algorithms here
Expand All @@ -22,7 +21,8 @@ class MLFactory:
"histboost" : ske.HistGradientBoostingRegressor
},
"tensorflow":
{
{
"sequential" : tfk.Sequential
}
}

Expand All @@ -37,26 +37,38 @@ def __setup_package_factory(self, package: str, algorithm: str, **kwargs):
Rough method for setting up a package
"""


package = package.lower()
if package not in self.__IMPLEMENTED_ALGORITHMS:
if package not in self.__IMPLEMENTED_ALGORITHMS.keys():
raise ValueError(f"{package} not included, currently accepted packages are :\n \
{list(self.__IMPLEMENTED_ALGORITHMS.keys())}")

algorithm = algorithm.lower()

if algorithm not in self.__IMPLEMENTED_ALGORITHMS[package]:
if algorithm not in self.__IMPLEMENTED_ALGORITHMS[package].keys():
raise ValueError(f"{algorithm} not implemented for {package}, currently accepted algorithms for {package} are:\n \
{list(self.__IMPLEMENTED_ALGORITHMS[package].keys())}")

return self.__IMPLEMENTED_ALGORITHMS[package][algorithm](**kwargs)
return self.__IMPLEMENTED_ALGORITHMS[package][algorithm](*kwargs)

def setup_scikit_model(self, algorithm: str, **kwargs)->SciKitInterface:
# Simple wrapper for scikit packages
interface = SciKitInterface(self._chain, self._prediction_variable)
interface.add_model(self.__setup_package_factory(package="scikit", algorithm=algorithm, **kwargs))
return interface

def setup_tensorflow_model(self, algorithm: str, network_structure: Dict[str, Any], **kwargs):
model = self.__setup_package_factory(package="tesnorflow", algorithm=algorithm, kwargs=kwargs)
return model

def setup_tensorflow_model(self, algorithm: str, **kwargs):
interface = TfInterface(self._chain, self._prediction_variable)

interface.add_model(self.__setup_package_factory(package="tensorflow", algorithm=algorithm))


for layer in kwargs["Layers"]:
layer_id = list(layer.keys())[0]

interface.add_layer(layer_id, layer[layer_id])

interface.build_model(kwargs["BuildSettings"])

interface.set_training_settings(kwargs["FitSettings"])
return interface
19 changes: 6 additions & 13 deletions MachineLearningMCMC/machine_learning/scikit_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
from file_handling.chain_handler import ChainHandler
from machine_learning.fml_interface import FmlInterface

"""
TODO:
- Add staged predict
"""

class SciKitInterface(FmlInterface):
def __init__(self, chain: ChainHandler, prediction_variable: str) -> None:
super().__init__(chain, prediction_variable)
Expand All @@ -25,16 +30,4 @@ def model_predict(self, test_data: DataFrame):
raise ValueError("No Model has been set!")

return self._model.predict(test_data)

def test_model(self):

if self._model is None:
raise ValueError("No Model has been set!")

if self._test_data is None or self._test_labels is None:
raise ValueError("No training data set")

prediction = self.model_predict(self._test_data)
test_as_numpy = self._test_labels.to_numpy().T[0]

self.evaluate_model(prediction, test_as_numpy)

53 changes: 53 additions & 0 deletions MachineLearningMCMC/machine_learning/tf_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Let's make a tensor flow interface!
from typing import Any
from machine_learning.fml_interface import FmlInterface
import tensorflow as tf
from file_handling.chain_handler import ChainHandler

class TfInterface(FmlInterface):
__TF_LAYER_IMPLEMENTATIONS = {
"dense": tf.keras.layers.Dense,
"dropout": tf.keras.layers.Dropout
}

def __init__(self, chain: ChainHandler, prediction_variable: str) -> None:
super().__init__(chain, prediction_variable)

self._model = None
self._layers = []
self._training_settings = {}


def add_layer(self, layer_id, layer_args):
if layer_id not in self.__TF_LAYER_IMPLEMENTATIONS.keys():
raise ValueError(f"{layer_id} not implemented yet!")

self._layers.append(self.__TF_LAYER_IMPLEMENTATIONS[layer_id.lower()](**layer_args))

def build_model(self, kwargs_dict):

if self._model is None or not self._layers:
raise ValueError("No model can be built! Please setup model and layers")

for layer in self._layers:
self._model.add(layer)

self._model.build()

self._model.compile(**kwargs_dict)

def set_training_settings(self, kwargs):
self._training_settings = kwargs

def train_model(self):
self._model.fit(self._training_data, self._training_labels, **self._training_settings)

def save_model(self, output_file: str):
self._model.save(output_file)

def load_model(self, input_file: str):
self._model = tf.saved_model.load(input_file)

def model_predict(self, testing_data):
# Hacky but means it's consistent with sci-kit interface
return self._model.predict_on_batch(testing_data).T[0]
Loading