From d5f934bb924907bf2f28d6174c1f8be80f5194f8 Mon Sep 17 00:00:00 2001 From: Jon Massey Date: Fri, 14 Feb 2025 12:46:10 +0000 Subject: [PATCH] Replace user-provided freetext Users may place personal information in freetext fields. Remove this in case they have. --- deploy/bin/sanitise_backup.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/deploy/bin/sanitise_backup.py b/deploy/bin/sanitise_backup.py index db146e64..df7a191e 100644 --- a/deploy/bin/sanitise_backup.py +++ b/deploy/bin/sanitise_backup.py @@ -20,6 +20,12 @@ def hash_password(password, salt=None, iterations=2000): b64_hash = base64.b64encode(pw_hash).decode("ascii").strip() return "{}${}${}${}".format(ALGORITHM, iterations, salt, b64_hash) +def extract_text_column_names(sql): + column_ddl_start = sql.find("(") + column_ddl_end = sql.rfind(")") + column_ddl = sql[column_ddl_start+1:column_ddl_end] + text_columns = [c.strip() for c in column_ddl.split(",") if " text " in c] + return [c.split(" ")[0].replace('"',"") for c in text_columns] def main(backup_file): fake = faker.Faker() @@ -63,6 +69,17 @@ def main(backup_file): stmts.append("COMMIT TRANSACTION") conn.executescript(";\n".join(stmts)+";\n") + + + cur = conn.execute("SELECT name, sql FROM sqlite_schema WHERE type ='table' AND name NOT LIKE 'sqlite_%' AND name NOT LIKE 'django_%' AND name NOT LIKE 'versioning%' AND sql LIKE '% text %';") + freetext_tables = [(t[0],extract_text_column_names(t[1])) for t in cur.fetchall()] + for table, columns in freetext_tables: + for column in columns: + if "data" in column: + continue + stmt = f"UPDATE {table} SET {column} = '[freetext removed]' WHERE COALESCE({column},'') <> '';" + conn.execute(stmt) + conn.commit() conn.close()