forked from trufflesecurity/trufflehog
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtruffleHog.py
120 lines (102 loc) · 4.26 KB
/
truffleHog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python
import shutil, sys, math, string, datetime, argparse, tempfile
from git import Repo
if sys.version_info[0] == 2:
reload(sys)
sys.setdefaultencoding('utf8')
BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
HEX_CHARS = "1234567890abcdefABCDEF"
def shannon_entropy(data, iterator):
"""
Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html
"""
if not data:
return 0
entropy = 0
for x in (ord(c) for c in iterator):
p_x = float(data.count(chr(x)))/len(data)
if p_x > 0:
entropy += - p_x*math.log(p_x, 2)
return entropy
def get_strings_of_set(word, char_set, threshold=20):
count = 0
letters = ""
strings = []
for char in word:
if char in char_set:
letters += char
count += 1
else:
if count > 20:
strings.append(letters)
letters = ""
count = 0
if count > threshold:
strings.append(letters)
return strings
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
def find_strings(git_url):
project_path = tempfile.mkdtemp()
Repo.clone_from(git_url, project_path)
repo = Repo(project_path)
already_searched = set()
for remote_branch in repo.remotes.origin.fetch():
branch_name = str(remote_branch).split('/')[1]
try:
repo.git.checkout(remote_branch, b=branch_name)
except:
pass
prev_commit = None
for curr_commit in repo.iter_commits():
if not prev_commit:
pass
else:
#avoid searching the same diffs
hashes = str(prev_commit) + str(curr_commit)
if hashes in already_searched:
prev_commit = curr_commit
continue
already_searched.add(hashes)
diff = prev_commit.diff(curr_commit, create_patch=True)
for blob in diff:
#print i.a_blob.data_stream.read()
printableDiff = blob.diff.decode()
if printableDiff.startswith("Binary files"):
continue
foundSomething = False
lines = blob.diff.decode().split("\n")
for line in lines:
for word in line.split():
base64_strings = get_strings_of_set(word, BASE64_CHARS)
hex_strings = get_strings_of_set(word, HEX_CHARS)
for string in base64_strings:
b64Entropy = shannon_entropy(string, BASE64_CHARS)
if b64Entropy > 4.5:
foundSomething = True
printableDiff = printableDiff.replace(string, bcolors.WARNING + string + bcolors.ENDC)
for string in hex_strings:
hexEntropy = shannon_entropy(string, HEX_CHARS)
if hexEntropy > 3:
foundSomething = True
printableDiff = printableDiff.replace(string, bcolors.WARNING + string + bcolors.ENDC)
if foundSomething:
commit_time = datetime.datetime.fromtimestamp(prev_commit.committed_date).strftime('%Y-%m-%d %H:%M:%S')
print(bcolors.OKGREEN + "Date: " + commit_time + bcolors.ENDC)
print(bcolors.OKGREEN + "Branch: " + branch_name + bcolors.ENDC)
print(bcolors.OKGREEN + "Commit: " + prev_commit.message + bcolors.ENDC)
print(printableDiff)
prev_commit = curr_commit
shutil.rmtree(project_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Find secrets hidden in the depths of git.')
parser.add_argument('git_url', type=str, help='URL for secret searching')
args = parser.parse_args()
find_strings(args.git_url)