forked from JasonGlazer/epXML2CSV
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathepXML2CSV.py
253 lines (217 loc) · 10.4 KB
/
epXML2CSV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
"""
epXML2CSV
A utility to extract numbers from the tabular XML file created by EnergyPlus and put
them into a CSV format. The utility is especially useful when extracting data from
a large number of simulation results. The utility collects values from all XML files
in the directory or from a specific XML file.
epXML2CSV csvfilename extractfilename <xmlfilename>
If xmlfilename is provided, extract values from just that file. If the xmlfilename
is not specified, than etract values from all xml files in the directory.
A row is generated for the xmlfilename or for each xmlfile in the directory.
The columns correspond to each item extracted.
The extractfilename format is shown below and each row of the file
corresponds to one column of the resulting CSV file.
userHeading, reportNameString, forString, subReportString, nameString, headingString
userHeading, reportNameString, forString, subReportString, nameString, headingString
userHeading, reportNameString, forString, subReportString, nameString, headingString
...etc..
If the forString is * then new tables are generated at the end of the file for each
consisting of the one line for each file and one column for each possible forString
found across the files.
If the nameString is in the form #sum-string then the string is search for for all
all the values and take the sum of all records that include the string somewhere.
Copyright (c) 2015, Jason Glazer
All rights reserved.
Redistribution and use in source and binary forms, with or
without modification, are permitted provided that the
following conditions are met:
1. Redistributions of source code must retain the above
copyright notice, this list of conditions and the following
disclaimer.
2. Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
import xml.etree.cElementTree as ET
import sys
import os
import string
import csv
from pprint import pprint
from sets import Set
# Main function that returns a string based on the extract info
def getTextFromEPXML(root, reportS, forS, subRepS, nameS, headS):
for repEl in root.findall('./' + reportS):
if repEl.find('for').text == forS:
#special type of #sum- extraction that sums value when matches are found
if nameS[:5] == '#sum-':
#print '>report: ', reportS
#print '>for: ', forS
#print '>subRep: ', subRepS
#print '>name: ', nameS
#print '>headS: ', headS
sum = 0.0
searchString = nameS[5:].lower()
#print "--searchString: ", searchString
for subRepEl in repEl.findall('./' + subRepS):
#print subRepEl.tag
for child in subRepEl:
#print " ",child.tag, child.text
if child.text.lower() == searchString:
try:
resultString = subRepEl.find(headS).text
#print resultString
if is_number(resultString):
resultVal = float(resultString)
sum = sum + resultVal
except AttributeError:
resultString = ''
return str(sum)
else:
for subRepEl in repEl.findall('./' + subRepS):
# normal extraction of the value
if subRepEl.find('name').text == nameS:
try:
resultStringRaw = subRepEl.find(headS).text
resultString = resultStringRaw.replace(","," ").strip()
except:
resultString = 'NAME NOT FOUND'
if resultString != None:
return resultString
return "~"
def is_number(s): # used by IL05
try:
float(s)
return True
except ValueError:
return False
def epXML2CSV(argv=None):
if argv == None:
argv = sys.argv
if len(argv) == 3:
[progName, csvFileName, extractFileName] = argv
listOfXmlFiles =[]
elif len(argv) == 4:
[progName, csvFileName, extractFileName,specXmlFileName] = argv
listOfXmlFiles = [specXmlFileName,]
if not os.path.exists(extractFileName):
print "epXML2CSV Specified XML file file not found: ", specXmlFileName
sys.exit(1)
if not os.path.exists(extractFileName):
print "epXML2CSV Extract file not found: ", extractFileName
sys.exit(2)
extractLines = []
extractFile = open(extractFileName, 'r')
extractReader = csv.reader(extractFile, delimiter=',')
for row in extractReader:
if row:
if row[0][0] != "#": #remove rows that start with comment symbol "#"
extractLines.append(map(string.strip, row))
extractFile.close()
try:
csvFile = open(csvFileName, 'wb')
except IOError:
print "epXML2CSV The csv output file appears to be open in another program: ", csvFileName
sys.exit(3)
csvWriter = csv.writer(csvFile, delimiter=',')
firstHeadings = []
listOfPulledUsingForWild = []
userHeadingForWild = []
# if no specific XML file was specified, then make list of all files in directory
if len(listOfXmlFiles) == 0:
for xmlFileName in os.listdir("."):
if xmlFileName.endswith(".xml"):
listOfXmlFiles.append(xmlFileName)
for xmlFileName in listOfXmlFiles:
# The following lists will contain the output for the CSV file, starting with name of XML file
Headings = ["FileName"]
Pulled = [xmlFileName]
# the heavy lifting of parsing the XML file is done on the next line by the ElementTree library
if not os.path.exists(xmlFileName):
print "epXML2CSV File not found: ", xmlFileName
continue
tree = ET.parse(xmlFileName)
root = tree.getroot()
# ----------- Get the header information -------------
# <BuildingName>Building</BuildingName>
# <EnvironmentName>Chicago Ohare Intl Ap IL USA TMY3 WMO#=725300</EnvironmentName>
# <WeatherFileLocationTitle>Chicago Ohare Intl Ap IL USA TMY3 WMO#=725300</WeatherFileLocationTitle>
# <ProgramVersion>EnergyPlus-Windows-32 8.0.0.008, YMD=2013.05.16 11:07</ProgramVersion>
# <SimulationTimestamp>
# <Date>
# 2013-05-16
# </Date>
# <Time>
# 11:07:53
# </Time>
# </SimulationTimestamp>
Headings.append('BuildingName')
Pulled.append(root.find('BuildingName').text)
Headings.append('EnvironmentName')
Pulled.append(root.find('EnvironmentName').text)
Headings.append('WeatherFileLocationTitle')
Pulled.append(root.find('WeatherFileLocationTitle').text)
Headings.append('ProgramVersion')
Pulled.append(root.find('ProgramVersion').text.replace(","," "))
Headings.append('SimulationTimestamp')
Pulled.append(root.find('SimulationTimestamp').find('Date').text.strip() + ' ' + root.find('SimulationTimestamp').find('Time').text.strip())
for [userHeadingStr, reportStr, forStr, subReportStr, nameStr, headingStr] in extractLines:
#when using a wildcard on the FOR an extra column is needed one for the name of each FOR
if forStr == '*':
#loop through all instances of the report for each "for"
forFounds = []
PulledFromFor = {}
for repEl in root.findall('./' + reportStr):
forFounds.append(repEl.find('for').text)
for forFound in forFounds:
PulledFromFor[forFound] = getTextFromEPXML(root, reportStr, forFound, subReportStr, nameStr, headingStr)
listOfPulledUsingForWild.append([xmlFileName,userHeadingStr,PulledFromFor])
if userHeadingStr not in userHeadingForWild:
userHeadingForWild.append(userHeadingStr)
Pulled.append('SEE BELOW') # a filler to make sure a space is taken
else:
Pulled.append(getTextFromEPXML(root, reportStr, forStr, subReportStr, nameStr, headingStr))
Headings.append(userHeadingStr)
if Headings != firstHeadings:
firstHeadings = Headings
csvWriter.writerow(Headings)
csvWriter.writerow(Pulled)
print "File: " + xmlFileName + " Number of values: " + str(len(Pulled))
# pprint(listOfPulledUsingForWild,stream=csvFile)
for userHeading in userHeadingForWild:
#create a table for each user defined heading with columns of FOR's and rows of the filenames
csvFile.write('\n') #leave a blank line
csvFile.write('\n') #leave a blank line
csvFile.write(userHeading + '\n')
prevKeys = []
for [file,userHeadingInList,resDict] in listOfPulledUsingForWild:
if userHeading == userHeadingInList:
curKeys = resDict.keys()
if curKeys != prevKeys: #only print a new list of
csvFile.write('\n') #leave a blank line
row = ['File',]
row.extend(curKeys)
csvWriter.writerow(row)
prevKeys = curKeys
row = []
row.append(file)
for curKey in curKeys:
row.append(resDict[curKey])
csvWriter.writerow(row)
csvFile.close()
if __name__ == "__main__":
sys.exit(epXML2CSV())