-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsplit.py
86 lines (80 loc) · 3.41 KB
/
split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3
import argparse, string, sys
import zipfile, os
import random
import shutil
#NOTE: ONLY RUN THIS SCRIPT IN A DIRECTORY CONTAINING FOLDERS TITLED BENIGN AND MALICIOUS
parser = argparse.ArgumentParser(description='split dataset')
parser.add_argument('-t', '--training-percentage', type=int, required=False, help='place COUNT percent of data into training folder')
options, args = parser.parse_known_args(sys.argv[1:])
for arg in args:
if arg[0] == '-':
sys.stderr.write("split: invalid option -- '" + arg + "'\n")
exit(1)
percent = 50
if options.training_percentage:
if options.training_percentage > 95:
sys.stderr.write("too much data going to training, choose a percentage from 5-95")
exit(1)
elif options.training_percentage < 5:
sys.stderr.write("too little data going to training, choose a percentage from 5-95")
exit(1)
percent = options.training_percentage
for filename in os.listdir("Benign"):
if filename.endswith(".zip"):
#print (filename)
name = os.path.splitext(os.path.basename(filename))[0]
if not os.path.isdir(name):
try:
zip = zipfile.ZipFile(filename)
os.mkdir(name)
zip.extractall()
except zipfile.BadZipfile as e:
print("BAD ZIP: "+filename)
try:
os.remove(filename)
except OSError as e: # this would be "except OSError, e:" before Python 2.6
if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
raise # re-raise exception if a different error occured ```
for filename in os.listdir("Malicious"):
if filename.endswith(".zip"):
#print (filename)
name = os.path.splitext(os.path.basename(filename))[0]
if not os.path.isdir(name):
try:
zip = zipfile.ZipFile(filename)
os.mkdir(name)
zip.extractall()
except zipfile.BadZipfile as e:
print("BAD ZIP: "+filename)
try:
os.remove(filename)
except OSError as e: # this would be "except OSError, e:" before Python 2.6
if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
raise # re-raise exception if a different error occured ```
os.makedirs("Testing/Benign")
os.makedirs("Training/Benign")
os.makedirs("Testing/Malicious")
os.makedirs("Training/Malicious")
benign_files = os.listdir("Benign")
malicious_files = os.listdir("Malicious")
total_benign_files = len(benign_files)
training_benign = round(total_benign_files * (percent/100))
total_malicious_files = len(malicious_files)
training_malicious = round(total_malicious_files * (percent/100))
source = 'Benign'
dest = 'Training/Benign'
for file_name in random.sample(benign_files, training_benign):
shutil.move(os.path.join(source, file_name), dest)
dest = 'Testing/Benign'
benign_files = os.listdir("Benign")
for f in benign_files:
os.rename(os.path.join(source, f), os.path.join(dest, f))
source = 'Malicious'
dest = 'Training/Malicious'
for file_name in random.sample(malicious_files, training_malicious):
shutil.move(os.path.join(source, file_name), dest)
dest = 'Testing/Malicious'
malicious_files = os.listdir("Malicious")
for f in malicious_files:
os.rename(os.path.join(source, f), os.path.join(dest, f))