-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataprocessing.py
180 lines (146 loc) · 8.1 KB
/
dataprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#file for creating, loading and pre-processing the OLGA data
#uses an inp-file to create an OLGA-model and a bat-file to run the case. Retrieves the data from the generated ppl-file. The inp-file is modified between each run in a loop.
#OPTIONS
change_pressure=False #deside if input pressure should be changed
skewed_ql_in = False #decide if data set should be evenly distibuted w.r.t inlet liquid (oil) flowrate or not. Example of skewed dataset: 10% - [0.05 - 0.15] m3/s and 90% - [0.15 - 0.25] m3/s
add_noise = False
#TODO: select what parameters is wanted from the ppl-file and expand size of dataset
#TODO: add noise to dataset (!!!)
import pyfas as fa # library to extract results from ppl file
import pandas as pd
import subprocess # to run batchfile
import matplotlib.pyplot as plt #not needed atm
import numpy as np
pd.options.display.max_colwidth = 120 #needed(?) for the dataframes created from ppl-objects (pyfas)
# - load files -
path = 'C:/Users/aksel/NTNU/Prosjektoppgave/VFM_BlackOilModel/' # .. do not really need anymore as all files are in same folder
inpfilename = 'inputOLGA.inp' # name of input file used to create OLGA-object
pplfilename = 'inputOLGA.ppl' #name of output file
# Steps for generating data: 1) modify input file with new variables 2) run modified input file via bat file 3) extract results from ppl file and add to dataset
size = 20 #size of dataset
number_of_vars = 10 #number of variables to extract to the dataset. Change when you know this
column_names = ["inlet pressure [Pa]",
"outlet pressure [Pa]",
"inlet oil flowrate [m3/s]",
"outlet oil flowrate [m3/s]",
"inlet gas flowrate [m3/s]",
"outlet gas flowrate [m3/s]",
"inlet water flowrate [m3/s]",
"outlet water flowrate [m3/s]",
"inlet holdup [-]",
"outlet holdup [-]"] #change if changing the variables selected
dataset = np.zeros((size, number_of_vars))
#define range of the variables that chould be changed
wc = 0.3 #water cut (use same as in OLGA-file)
min_ql_in = 0.05 # [Sm3/s]
max_ql_in = 0.25 # [Sm3/s]
if change_pressure == True:
min_p_out = 5*10**5 # [Pa] (deside on range)
max_p_out = 15*10**5 # [Pa]
#make skewed data set so that: x% - [0.05 - 0.15] m3/s and (100-x)% - [0.15 - 0.25] m3/s
if skewed_ql_in== True:
split = 0.1 #determines how large portion of the data set should be in lower range of the values
max_ql_in_low = 0.15 # [Sm3/s] maximum value for low input flowrate dataset
min_ql_in_high = 0.15 # [Sm3/s] minmum value for high input flowrate dataset
# -- Loop for generating data --
for i in range(size):
print("Iteration at: ", i, " out of ", size)
#Generate random inlet oil flowrate and outlet pressure
#Flowrate
if skewed_ql_in == True: #make skewed dataset
distribution_parameter = np.random.uniform(0, 1)
if distribution_parameter < split: #generate data in the lower range of ql_in
ql_in = np.random.uniform(min_ql_in, max_ql_in_low)
else: #generate data in the hogher range of ql_in
ql_in = np.random.uniform(min_ql_in_high, max_ql_in)
else: #make evenly distributed values for ql_in
ql_in = np.random.uniform(min_ql_in, max_ql_in)
#Change from liquid inlet flow rate to oil inlet flow rate
qo_in0 = (1-wc)*ql_in #standard inlet oil flow rate
#Pressure
if change_pressure ==True:
p_out = np.random.uniform(min_p_out, max_p_out)
# -- STEP ONE: modify input file to have new flowrate and pressure (and other parameters if wanted) --
inputfile = open(path+inpfilename, "r")
lines = inputfile.readlines()
#change inlet oil flowrate
lines[47] = " FEEDNAME=Bofluid, PHASE=OIL, FEEDSTDFLOW={} Sm3/s \n".format(qo_in0)
#change outlet pressure
if change_pressure == True:
lines[70] = " TEMPERATURE=25 C, PRESSURE={} Pa, FLUID=Bofluid \n".format(p_out)
inputfile.close()
inputfile = open(path+inpfilename, "w")
inputfile.writelines(lines)
inputfile.close()
# -- STEP TWO: run the modified input file from batch file --
subprocess.call([r'C:/Users/aksel/NTNU/Prosjektoppgave/VFM_BlackOilModel/inputOLGA.bat']) #run inputfile via batchfile
# -- STEP THREE: extract data and add to dataset (keep control of how many variables you have in each ) --
ppl = fa.Ppl(path+pplfilename) #create ppl-object
# pressure
ppl.extract(15) #extract the pressures to the ppl object
pressures = ppl.data[15][1] # save the pressures to array
p_in = pressures[-1][0] #last timestep
#if change_pressure == False: #<-remove this?
p_out = pressures[-1][-1] # or give the specified value directly if known in advanced (as it is not supposed to change) (NOTE: outlet in ppl is not giving the same value as specified in the inp-file)
# Flow rates (extract at local conditions, not standard)
# gas voulme flow
ppl.extract(3)
gas_flow = ppl.data[3][1] # save the pressures to array
qg_in = gas_flow[-1][1] #last timestep, "first" value (disregard value at pos. 0 for flow rates)
qg_out = gas_flow[-1][-1] #last timestep, last value
# Volumetric flow rate oil
ppl.extract(5)
oil_flowrate = ppl.data[5][1] # save the pressures to array
qo_in = oil_flowrate[-1][1] #last timestep, first value
qo_out = oil_flowrate[-1][-1] #last timestep, last value
# Volumetric flow rate water
ppl.extract(6)
water_flowrate = ppl.data[6][1] # save the pressures to array
qw_in = water_flowrate[-1][1] #last timestep, last value
qw_out = water_flowrate[-1][-1] #last timestep, last value
# Holdup (use to derive void fraction for MLE)
ppl.extract(13)
holdup = ppl.data[13][1] # save the holdup to array
hol_in = holdup[-1][0] # last timestep, first value
hol_out = holdup[-1][-1] # last timestep, last value
#make array of values
arr = np.array([p_in, p_out, qo_in, qo_out, qg_in, qg_out, qw_in, qw_out, hol_in, hol_out]) #TODO: add additional values if needed
#add values to dataset
dataset[i] = arr
if add_noise: #noise should be added for the MLE to work !!
# - determin maximum error for each entity (99.73 percent of all values is within this, meaning it is 3*std_dev)
maximum_noise = 0.05 #percent of maximum value
#print(dataset)
max_p_error = maximum_noise * np.max(dataset[:,0]) # 5 percent of maximum of the measured inlet pressures
max_qo_error = maximum_noise * np.max(dataset[:,2]) # 5 percent of maximum of the measured inlet oil flow rate
max_qg_error = maximum_noise * np.max(dataset[:,5]) # 5 percent of maximum of the measured outlet gas flow rate
max_qw_error = maximum_noise * np.max(dataset[:,6]) # 5 percent of maximum of the measured inlet water flow rate
max_hu_error = maximum_noise * np.max(dataset[:,8]) # 5 percent of maximum of the measured inlet holdup
# - make error arrays and ensure 99.73 percent of all values is within this maximum error. Assume homoscedasticity
# inlet
noise_p_in = np.random.normal(0, max_p_error/3, size)
noise_qo_in = np.random.normal(0,max_qo_error/3, size)
noise_qg_in = np.random.normal(0,max_qg_error/3, size)
noise_qw_in = np.random.normal(0,max_qw_error/3, size)
noise_hu_in = np.random.normal(0,max_hu_error/3, size)
# outlet
noise_p_out = np.random.normal(0, max_p_error/3, size)
noise_qo_out = np.random.normal(0,max_qo_error/3, size)
noise_qg_out = np.random.normal(0,max_qg_error/3, size)
noise_qw_out = np.random.normal(0,max_qw_error/3, size)
noise_hu_out = np.random.normal(0,max_hu_error/3, size)
# - add the noise/error to the dataset array
dataset[:,0] += noise_p_in
dataset[:,1] += noise_p_out
dataset[:,2] += noise_qo_in
dataset[:,3] += noise_qo_out
dataset[:,4] += noise_qg_in
dataset[:,5] += noise_qg_out
dataset[:,6] += noise_qw_in
dataset[:,7] += noise_qw_out
dataset[:,8] += noise_hu_in
dataset[:,9] += noise_hu_out
# create dataframe from numpy array with dataset
df = pd.DataFrame(data = dataset, columns = column_names)
# Generate a csv file from the dataframe and save to data-folder
df.to_csv("data/dataset.csv", index=False)