-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSaveSalstatNative.py
145 lines (129 loc) · 4.65 KB
/
SaveSalstatNative.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""
SaveSalstatNative
A module to save in a native XML format for Salstat
(c) 2013, Alan James Salmoni
"""
import numpy
import datetime, pickle
from BeautifulSoup import BeautifulStoneSoup
def MakeString(vector):
# returns a string representation of a numpy vector
return ','.join([str(scal) for scal in vector])
def MakeVector(string):
return numpy.array([float(scal) for scal in string.split(',')])
def Label(key_value_pairs):
num_labels = len(key_value_pairs)
label_xml = ''
for label in key_value_pairs:
ln = '\t\t\t<label>\n\t\t\t\t<labelvalue>%s</labelvalue>\n\t\t\t\t<labeltext>%s</labeltext>\n\t\t\t</label>\n'%(label, key_value_pairs[label])
label_xml = label_xml + ln
return label_xml
def Data(data):
data_str = pickle.dumps(data)
ln = '\t\t\t<data>%s\n\t\t\t</data>\n'%(data_str)
return ln
def Variable(name, ivdv, labels, missingvalues, align, measure, data):
labels_xml = Label(labels)
data_xml = Data(data)
var_xml = '\t\t<variable>\n\t\t\t<name>%s</name>\n'%(name)
var_xml = var_xml + '\t\t\t<ivdv>%s</ivdv>\n'%(ivdv)
var_xml = var_xml + labels_xml
var_xml = var_xml + '\t\t\t<missingvalues>%s</missingvalues>\n'%(missingvalues)
var_xml = var_xml + '\t\t\t<align>%s</align>\n'%(align)
var_xml = var_xml + '\t\t\t<measure>%s</measure>\n'%(measure)
var_xml = var_xml + data_xml + '\t\t</variable>\n'
return var_xml
def Variables(variables):
vars_xml = '\t<variables>\n'
for var in variables:
var_xml = Variable(var.name, var.ivdv, var.labels, var.missingvalues, \
var.align, var.measure, var.data)
vars_xml = vars_xml + var_xml
vars_xml = vars_xml + '\t</variables>\n'
return vars_xml
def NativeDoc(version, fname, variables):
now = datetime.date.today()
date_str = now.strftime("%Y-%m-%d")
# convert to human-readable format
variables = Variables(variables)
initial_xml = '<SalstatDoc>\n\t<version>%s</version>\n\t<filename>%s</filename>\
\n\t<date_created>%s</date_created>\n'%(version, fname, date_str)
ending_xml = '%s</SalstatDoc>'%variables
return initial_xml + ending_xml
class variableobj(object):
def __init__(self, name):
self.name = name
def SaveNativeDoc(grid, filename):
# default suffix is .xml
ColsUsed, colnums = grid.GetUsedCols()
vars = []
for col in colnums:
var_name = grid.GetColLabelValue(col)
var = variableobj(var_name)
var.labels = {}
var.data = grid.CleanData(col)
var.ivdv = "Not set"
var.missingvalues = "Not set"
var.measure = "Not set"
var.align = "Not set"
n = NativeDoc("20131022", filename, vars)
try:
fout = open(filename, 'w')
fout.write(n)
fout.close()
except:
pass
def LoadNativeDoc(grid, filename):
try:
fin = open(filename, 'r')
data = fin.read()
fin.close()
soup = BeautifulStoneSoup(data)
vars = soup.findAll('variable')
for idx, var in enumerate(vars):
var_name = var('name')[0].text
var_ivdv = var('ivdv')[0].text
var_align = var('align')[0].text
var_missingvalues = var('missingvalues')[0].text
var_data = var('data')[0].text
grid.SetColLabelValue(idx, var_name)
vector = pickle.loads(var_data)
for row in range(len(vector)):
grid.SetCellValue(row, idx, vector[row])
# cannot do any others just yet!
# Need to have meta data in the grid
except: # specific exception?
data = ""
if __name__ == '__main__':
var1 = variableobj('Var001')
var2 = variableobj('Var002')
var3 = variableobj('Var003')
var1.labels = {1: 'Label 1', 2: 'Label 2','a':'Label a'}
var2.labels = {1: 'Label a', 2: 'Label b','a':'Label 123'}
var3.labels = {1: 'Label 1', 2: 'Label 2','a':'Label 456'}
#print Label(labels)
var1.data = numpy.array(([2,3.4,4,3.565,7]))
var2.data = numpy.array(([2,3.4,4,3.565,7]))
var3.data = numpy.array(([2,3.4,4,3.565,7]))
var1.ivdv = 'Not yet'
var2.ivdv = 'IV'
var3.ivdv = 'DV'
var1.missingvalues = 'None yet'
var2.missingvalues = '-99'
var3.missingvalues = 'Missing'
var1.align = 'left'
var2.align = "CENTRE"
var3.align = "Right"
var1.measure = "Ordinal"
var2.measure = "INTERVAL"
var3.measure = "nominal"
vars = [var1, var2, var3]
"""
n = NativeDoc("20131022",'filename001',vars)
fout = open('/Users/alansalmoni/sal.xml','w')
fout.write(n)
fout.close()
"""
vec = numpy.random.rand(400)
vecstr = MakeString(vec)
strvec = MakeVector(vecstr)