diff --git a/xgitguard/github-enterprise/enterprise_cred_detections.py b/xgitguard/github-enterprise/enterprise_cred_detections.py index e45b2a8..0e6e2aa 100644 --- a/xgitguard/github-enterprise/enterprise_cred_detections.py +++ b/xgitguard/github-enterprise/enterprise_cred_detections.py @@ -614,21 +614,11 @@ def run_detection( search_query_list = format_search_query_list(configs.secondary_keywords) if search_query_list: if ml_prediction: - # Train Model if not present Already - model_file = os.path.join( - configs.output_dir, "xgg_cred_rf_model_object.pickle" - ) - if os.path.exists(model_file): - logger.info( - f"Detection process will use Already persisted Trained Model present in: {model_file}" - ) - else: - logger.info( - f"No persisted Trained Model present. So training and persisting a model now" - ) - xgg_train_model( - training_data_file="cred_train.csv", model_name="xgg_cred_rf_" - ) + # Load BERT model and tokenizer + bert_model_path = "path_to_bert_model/bert_secret_detection_model" + tokenizer = BertTokenizer.from_pretrained(bert_model_path) + model = BertForSequenceClassification.from_pretrained(bert_model_path) + model.eval() # Set model to evaluation mode else: logger.info(f"No Search query to process. Ending.") sys.exit(1) diff --git a/xgitguard/github-enterprise/enterprise_key_detections.py b/xgitguard/github-enterprise/enterprise_key_detections.py index 95b27e4..3d2ff47 100644 --- a/xgitguard/github-enterprise/enterprise_key_detections.py +++ b/xgitguard/github-enterprise/enterprise_key_detections.py @@ -593,21 +593,11 @@ def run_detection( search_query_list = format_search_query_list(configs.secondary_keywords) if search_query_list: if ml_prediction: - # Train Model if not present Already - model_file = os.path.join( - configs.output_dir, "xgg_key_rf_model_object.pickle" - ) - if os.path.exists(model_file): - logger.info( - f"Detection process will use Already persisted Trained Model present in: {model_file}" - ) - else: - logger.info( - f"No persisted Trained Model present. So training and persisting a model now" - ) - xgg_train_model( - training_data_file="key_train.csv", model_name="xgg_key_rf_" - ) + # Load BERT model and tokenizer + bert_model_path = "path_to_bert_model/bert_secret_detection_model" + tokenizer = BertTokenizer.from_pretrained(bert_model_path) + model = BertForSequenceClassification.from_pretrained(bert_model_path) + model.eval() # Set model to evaluation mode else: logger.info(f"No Search query to process. Ending.") sys.exit(1) diff --git a/xgitguard/github-public/public_cred_detections.py b/xgitguard/github-public/public_cred_detections.py index a78e444..647fa16 100644 --- a/xgitguard/github-public/public_cred_detections.py +++ b/xgitguard/github-public/public_cred_detections.py @@ -628,22 +628,11 @@ def run_detection( ) if search_query_list: if ml_prediction: - # Train Model if not present Already - model_file = os.path.join( - configs.output_dir, "public_xgg_cred_rf_model_object.pickle" - ) - if os.path.exists(model_file): - logger.info( - f"Detection process will use Already persisted Trained Model present in: {model_file}" - ) - else: - logger.info( - f"No persisted Trained Model present. So training and persisting a model now" - ) - xgg_train_model( - training_data_file="public_cred_train.csv", - model_name="public_xgg_cred_rf_", - ) + # Load BERT model and tokenizer + bert_model_path = "path_to_bert_model/bert_secret_detection_model" + tokenizer = BertTokenizer.from_pretrained(bert_model_path) + model = BertForSequenceClassification.from_pretrained(bert_model_path) + model.eval() # Set model to evaluation mode else: logger.info(f"No Search query to process. Ending.") sys.exit(1) diff --git a/xgitguard/github-public/public_key_detections.py b/xgitguard/github-public/public_key_detections.py index 0829b6e..452fe77 100644 --- a/xgitguard/github-public/public_key_detections.py +++ b/xgitguard/github-public/public_key_detections.py @@ -603,22 +603,11 @@ def run_detection( ) if search_query_list: if ml_prediction: - # Train Model if not present Already - model_file = os.path.join( - configs.output_dir, "public_xgg_key_rf_model_object.pickle" - ) - if os.path.exists(model_file): - logger.info( - f"Detection process will use Already persisted Trained Model present in: {model_file}" - ) - else: - logger.info( - f"No persisted Trained Model present. So training and persisting a model now" - ) - xgg_train_model( - training_data_file="public_key_train.csv", - model_name="public_xgg_key_rf_", - ) + # Load BERT model and tokenizer + bert_model_path = "path_to_bert_model/bert_secret_detection_model" + tokenizer = BertTokenizer.from_pretrained(bert_model_path) + model = BertForSequenceClassification.from_pretrained(bert_model_path) + model.eval() # Set model to evaluation mode else: logger.info(f"No Search query to process. Ending.") sys.exit(1)