5
5
6
6
import pandas as pd
7
7
8
- from crowdsourcer .models import Option , Question , QuestionGroup , Section
8
+ from crowdsourcer .models import MarkingSession , Option , Question , QuestionGroup , Section
9
9
10
10
11
11
class Command (BaseCommand ):
12
12
help = "import questions"
13
13
14
- question_file = settings .BASE_DIR / "data" / "combined_authority_questions.xlsx"
15
-
16
14
column_names = [
17
15
"question_no" ,
18
16
"topic" ,
@@ -29,34 +27,129 @@ class Command(BaseCommand):
29
27
30
28
# get round limits on length of sheet names
31
29
sheet_map = {
32
- "Buildings & Heating & Green Skills" : "Buildings & Heating & Green Ski" ,
30
+ "Buildings & Heating & Green Skills (CA)" : "B&H CA" ,
31
+ "Collaboration & Engagement (CA)" : "C&E CA" ,
32
+ "Governance & Finance (CA)" : "G&F CA" ,
33
33
}
34
34
35
35
def add_arguments (self , parser ):
36
36
parser .add_argument (
37
37
"-q" , "--quiet" , action = "store_true" , help = "Silence progress bars."
38
38
)
39
39
40
+ parser .add_argument (
41
+ "--file" , action = "store" , help = "Excel file containing the questions"
42
+ )
43
+
44
+ parser .add_argument (
45
+ "--session" , action = "store" , help = "Marking session to use questions with"
46
+ )
47
+
40
48
parser .add_argument (
41
49
"--text_only" ,
42
50
action = "store_true" ,
43
51
help = "Only update question text, criteria and clarifications" ,
44
52
)
53
+ parser .add_argument (
54
+ "--column_list" , action = "store" , help = "file with list of column names"
55
+ )
56
+
57
+ def get_column_names (self , ** kwargs ):
58
+ column_list = kwargs .get ("column_list" , None )
59
+ column_list = settings .BASE_DIR / "data" / column_list
60
+ if not column_list .exists ():
61
+ self .stderr .write (
62
+ f"file does not exist: { column_list } , using standard columns"
63
+ )
64
+ return
65
+
66
+ if column_list is not None :
67
+ df = pd .read_csv (settings .BASE_DIR / "data" / column_list )
68
+ columns = []
69
+ for _ , row in df .iterrows ():
70
+ columns .append (row ["Column" ])
71
+ self .column_names = columns
45
72
46
73
def handle (self , quiet : bool = False , * args , ** kwargs ):
47
- group = QuestionGroup .objects .get (description = "Combined Authority" )
74
+ file = kwargs .get ("file" , None )
75
+
76
+ if file is None :
77
+ self .stderr .write ("please supply a file name" )
78
+ return
79
+
80
+ self .question_file = settings .BASE_DIR / "data" / file
48
81
49
- for section in Section .objects .filter (title__contains = "(CA)" ):
50
- title = section .title .replace (" (CA)" , "" )
82
+ session_label = kwargs .get ("session" , None )
83
+ try :
84
+ session = MarkingSession .objects .get (label = session_label )
85
+ except MarkingSession .DoesNotExist :
86
+ self .stderr .write (f"No session with that name: { session_label } " )
87
+ return
88
+
89
+ group = QuestionGroup .objects .get (
90
+ description = "Combined Authority" , marking_session = session
91
+ )
92
+
93
+ self .get_column_names (** kwargs )
94
+
95
+ for section in Section .objects .filter (
96
+ marking_session = session , title__contains = "(CA)"
97
+ ):
98
+ header = 2
99
+ sheet_name = self .sheet_map .get (section .title , section .title )
100
+ print (sheet_name )
51
101
df = pd .read_excel (
52
102
self .question_file ,
53
- sheet_name = self .sheet_map .get (title , title ),
54
- header = 2 ,
55
- # remove blank and hidden notes columns
103
+ sheet_name = sheet_name ,
104
+ )
105
+
106
+ if "Question" in df .columns :
107
+ header = 0
108
+ else :
109
+ found_header = False
110
+ for index , row in df .iterrows ():
111
+ for i in [2 , 3 ]:
112
+ q_cell = row .iat [i ]
113
+ if type (q_cell ) == str and q_cell .strip () == "Question" :
114
+ header = index + 1
115
+ found_header = True
116
+ break
117
+
118
+ if found_header :
119
+ break
120
+
121
+ if index > 5 :
122
+ print (f"Did not find header in { section } " )
123
+ break
124
+
125
+ df = pd .read_excel (
126
+ self .question_file ,
127
+ sheet_name = sheet_name ,
128
+ header = header ,
56
129
usecols = lambda name : name != "Notes" and "Unnamed" not in name ,
57
130
)
58
131
59
132
df = df .dropna (axis = "index" , how = "all" )
133
+ drop_cols = [
134
+ "Climate Justice/Adaptation Tag" ,
135
+ "Drop down box options for no mark awarded (internal)" ,
136
+ "Is this question or criteria changing?" ,
137
+ "Change proposed" ,
138
+ "New Criteria" ,
139
+ "Clarifications" ,
140
+ "2023 Scorecards Criteria" ,
141
+ "2023 Scorecards Clarifications" ,
142
+ "2023 Criteria" ,
143
+ "2023 Clarifications" ,
144
+ "Previous Criteria from 2023 Scorecards" ,
145
+ "Type" ,
146
+ "Edits" ,
147
+ "Total Points Available when weighted" ,
148
+ "Weighting" ,
149
+ ]
150
+ for col in drop_cols :
151
+ if col in df .columns :
152
+ df = df .drop (col , axis = 1 )
60
153
61
154
columns = list (self .column_names )
62
155
options = len (df .columns ) - len (self .column_names ) + 1
@@ -69,6 +162,9 @@ def handle(self, quiet: bool = False, *args, **kwargs):
69
162
if pd .isna (row ["question_no" ]):
70
163
continue
71
164
165
+ if pd .isna (row ["question" ]):
166
+ continue
167
+
72
168
q_no = str (row ["question_no" ])
73
169
q_part = None
74
170
if pd .isna (q_no ):
@@ -86,7 +182,10 @@ def handle(self, quiet: bool = False, *args, **kwargs):
86
182
if row ["how_marked" ] == "FOI" :
87
183
how_marked = "foi"
88
184
question_type = "foi"
89
- elif "National Data" in row ["how_marked" ]:
185
+ elif (
186
+ "National Data" in row ["how_marked" ]
187
+ or "National data" in row ["how_marked" ]
188
+ ):
90
189
how_marked = "national_data"
91
190
question_type = "national_data"
92
191
@@ -105,7 +204,7 @@ def handle(self, quiet: bool = False, *args, **kwargs):
105
204
pass
106
205
else :
107
206
print (
108
- f"missing question type: { title } , { row ['question_no' ]} - { row ['question_type' ]} "
207
+ f"missing question type: { section . title } , { row ['question_no' ]} - { row ['question_type' ]} "
109
208
)
110
209
continue
111
210
0 commit comments