Skip to content

Commit 4305018

Browse files
committed
update CA question import
1 parent 76b0f5c commit 4305018

File tree

1 file changed

+111
-12
lines changed

1 file changed

+111
-12
lines changed

crowdsourcer/management/commands/import_combined_authority_questions.py

+111-12
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@
55

66
import pandas as pd
77

8-
from crowdsourcer.models import Option, Question, QuestionGroup, Section
8+
from crowdsourcer.models import MarkingSession, Option, Question, QuestionGroup, Section
99

1010

1111
class Command(BaseCommand):
1212
help = "import questions"
1313

14-
question_file = settings.BASE_DIR / "data" / "combined_authority_questions.xlsx"
15-
1614
column_names = [
1715
"question_no",
1816
"topic",
@@ -29,34 +27,129 @@ class Command(BaseCommand):
2927

3028
# get round limits on length of sheet names
3129
sheet_map = {
32-
"Buildings & Heating & Green Skills": "Buildings & Heating & Green Ski",
30+
"Buildings & Heating & Green Skills (CA)": "B&H CA",
31+
"Collaboration & Engagement (CA)": "C&E CA",
32+
"Governance & Finance (CA)": "G&F CA",
3333
}
3434

3535
def add_arguments(self, parser):
3636
parser.add_argument(
3737
"-q", "--quiet", action="store_true", help="Silence progress bars."
3838
)
3939

40+
parser.add_argument(
41+
"--file", action="store", help="Excel file containing the questions"
42+
)
43+
44+
parser.add_argument(
45+
"--session", action="store", help="Marking session to use questions with"
46+
)
47+
4048
parser.add_argument(
4149
"--text_only",
4250
action="store_true",
4351
help="Only update question text, criteria and clarifications",
4452
)
53+
parser.add_argument(
54+
"--column_list", action="store", help="file with list of column names"
55+
)
56+
57+
def get_column_names(self, **kwargs):
58+
column_list = kwargs.get("column_list", None)
59+
column_list = settings.BASE_DIR / "data" / column_list
60+
if not column_list.exists():
61+
self.stderr.write(
62+
f"file does not exist: {column_list}, using standard columns"
63+
)
64+
return
65+
66+
if column_list is not None:
67+
df = pd.read_csv(settings.BASE_DIR / "data" / column_list)
68+
columns = []
69+
for _, row in df.iterrows():
70+
columns.append(row["Column"])
71+
self.column_names = columns
4572

4673
def handle(self, quiet: bool = False, *args, **kwargs):
47-
group = QuestionGroup.objects.get(description="Combined Authority")
74+
file = kwargs.get("file", None)
75+
76+
if file is None:
77+
self.stderr.write("please supply a file name")
78+
return
79+
80+
self.question_file = settings.BASE_DIR / "data" / file
4881

49-
for section in Section.objects.filter(title__contains="(CA)"):
50-
title = section.title.replace(" (CA)", "")
82+
session_label = kwargs.get("session", None)
83+
try:
84+
session = MarkingSession.objects.get(label=session_label)
85+
except MarkingSession.DoesNotExist:
86+
self.stderr.write(f"No session with that name: {session_label}")
87+
return
88+
89+
group = QuestionGroup.objects.get(
90+
description="Combined Authority", marking_session=session
91+
)
92+
93+
self.get_column_names(**kwargs)
94+
95+
for section in Section.objects.filter(
96+
marking_session=session, title__contains="(CA)"
97+
):
98+
header = 2
99+
sheet_name = self.sheet_map.get(section.title, section.title)
100+
print(sheet_name)
51101
df = pd.read_excel(
52102
self.question_file,
53-
sheet_name=self.sheet_map.get(title, title),
54-
header=2,
55-
# remove blank and hidden notes columns
103+
sheet_name=sheet_name,
104+
)
105+
106+
if "Question" in df.columns:
107+
header = 0
108+
else:
109+
found_header = False
110+
for index, row in df.iterrows():
111+
for i in [2, 3]:
112+
q_cell = row.iat[i]
113+
if type(q_cell) == str and q_cell.strip() == "Question":
114+
header = index + 1
115+
found_header = True
116+
break
117+
118+
if found_header:
119+
break
120+
121+
if index > 5:
122+
print(f"Did not find header in {section}")
123+
break
124+
125+
df = pd.read_excel(
126+
self.question_file,
127+
sheet_name=sheet_name,
128+
header=header,
56129
usecols=lambda name: name != "Notes" and "Unnamed" not in name,
57130
)
58131

59132
df = df.dropna(axis="index", how="all")
133+
drop_cols = [
134+
"Climate Justice/Adaptation Tag",
135+
"Drop down box options for no mark awarded (internal)",
136+
"Is this question or criteria changing?",
137+
"Change proposed",
138+
"New Criteria",
139+
"Clarifications",
140+
"2023 Scorecards Criteria",
141+
"2023 Scorecards Clarifications",
142+
"2023 Criteria",
143+
"2023 Clarifications",
144+
"Previous Criteria from 2023 Scorecards",
145+
"Type",
146+
"Edits",
147+
"Total Points Available when weighted",
148+
"Weighting",
149+
]
150+
for col in drop_cols:
151+
if col in df.columns:
152+
df = df.drop(col, axis=1)
60153

61154
columns = list(self.column_names)
62155
options = len(df.columns) - len(self.column_names) + 1
@@ -69,6 +162,9 @@ def handle(self, quiet: bool = False, *args, **kwargs):
69162
if pd.isna(row["question_no"]):
70163
continue
71164

165+
if pd.isna(row["question"]):
166+
continue
167+
72168
q_no = str(row["question_no"])
73169
q_part = None
74170
if pd.isna(q_no):
@@ -86,7 +182,10 @@ def handle(self, quiet: bool = False, *args, **kwargs):
86182
if row["how_marked"] == "FOI":
87183
how_marked = "foi"
88184
question_type = "foi"
89-
elif "National Data" in row["how_marked"]:
185+
elif (
186+
"National Data" in row["how_marked"]
187+
or "National data" in row["how_marked"]
188+
):
90189
how_marked = "national_data"
91190
question_type = "national_data"
92191

@@ -105,7 +204,7 @@ def handle(self, quiet: bool = False, *args, **kwargs):
105204
pass
106205
else:
107206
print(
108-
f"missing question type: {title}, {row['question_no']} - {row['question_type']}"
207+
f"missing question type: {section.title}, {row['question_no']} - {row['question_type']}"
109208
)
110209
continue
111210

0 commit comments

Comments
 (0)