Skip to content

Commit

Permalink
Add truncate option (#163)
Browse files Browse the repository at this point in the history
  • Loading branch information
danturn authored Nov 20, 2024
1 parent 01687e4 commit 1a4ea33
Show file tree
Hide file tree
Showing 14 changed files with 370 additions and 133 deletions.
18 changes: 17 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,23 @@ The following data categories are supported
- Unknown - Unclassified, If any fields have this anonymisation will fail until it is replaced with a valid type


## Transformers
## Data transformation

Table data can be transformed in one of two ways,
1. Truncating the table
To use this option the table should be defined in the strategy file with the `truncate` key set to `true` and the `columns` key set to an empty array. e.g.
```
{
"table_name": "public.trunctable_table",
"description": "",
"truncate": true,
"columns": []
},
```

2. Transform the data in the table
Transforming table data requires a list of all table columns with a transformer defined for each and every column. (Note that for non PII or sensitive data, you can use the `Identity` transformer to not transform the data.

- EmptyJson - Literally `{}`
- Error - Not set. If any fields have this anonymisation will fail until it is replaced with a valid transformer
- FakeBase16String - Random Base16 string
Expand Down
56 changes: 53 additions & 3 deletions src/anonymiser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,43 @@ mod tests {

#[test]
fn successfully_transforms() {
let result_file_name = "test_files/results_successfully_transforms.sql";
assert!(anonymise(
"test_files/dump_file.sql".to_string(),
result_file_name.to_string(),
"test_files/strategy.json".to_string(),
None,
TransformerOverrides::none(),
)
.is_ok());

let db_url = "postgresql://postgres:postgres@localhost";
let postgres = format!("{}/postgres", db_url);
let mut conn = Client::connect(&postgres, NoTls).expect("expected connection to succeed");

conn.simple_query("drop database if exists successfully_transforms_test_db")
.unwrap();
conn.simple_query("create database successfully_transforms_test_db")
.unwrap();

let result = Command::new("psql")
.arg(format!("{}/successfully_transforms_test_db", db_url))
.arg("-f")
.arg(result_file_name)
.arg("-v")
.arg("ON_ERROR_STOP=1")
.output()
.expect("failed!");

assert!(
result.status.success(),
"failed to restore backup:\n{:?}",
String::from_utf8(result.stderr).unwrap()
);
}

#[test]
fn successfully_truncates() {
assert!(anonymise(
"test_files/dump_file.sql".to_string(),
"test_files/results.sql".to_string(),
Expand All @@ -83,13 +120,15 @@ mod tests {
let postgres = format!("{}/postgres", db_url);
let mut conn = Client::connect(&postgres, NoTls).expect("expected connection to succeed");

conn.simple_query("drop database if exists anonymiser_test")
conn.simple_query("drop database if exists successfully_truncates_db_name")
.unwrap();
conn.simple_query("create database anonymiser_test")
conn.simple_query("create database successfully_truncates_db_name")
.unwrap();

conn.close().expect("expected connection to close");

let result = Command::new("psql")
.arg(format!("{}/anonymiser_test", db_url))
.arg(format!("{}/successfully_truncates_db_name", db_url))
.arg("-f")
.arg("test_files/results.sql")
.arg("-v")
Expand All @@ -102,5 +141,16 @@ mod tests {
"failed to restore backup:\n{:?}",
String::from_utf8(result.stderr).unwrap()
);

let test_db = format!("{}/successfully_truncates_db_name", db_url);
let mut test_db_conn =
Client::connect(&test_db, NoTls).expect("expected connection to succeed");

let extra_data_row_count: i64 = test_db_conn
.query_one("select count(*) from extra_data", &[])
.unwrap()
.get(0);

assert_eq!(extra_data_row_count, 0);
}
}
11 changes: 9 additions & 2 deletions src/file_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ mod tests {
strategy_tuple("phone_number"),
]),
);

strategies.insert(
"public.extra_data".to_string(),
HashMap::from([strategy_tuple("id"), strategy_tuple("data")]),
);

strategies
}

Expand All @@ -123,7 +129,8 @@ mod tests {
fn can_read_and_output_compressed_with_default() {
let input_file = "test_files/dump_file.sql".to_string();
let compressed_file = "test_files/compressed_file_reader_test_results.sql".to_string();
let uncompressed_file_name = "test_files/uncompressed_file_reader_test_results.sql";
let uncompressed_file_name =
"test_files/uncompressed_file_reader_can_read_and_output_compressed_with_default.sql";

let _ = fs::remove_file(&compressed_file);
let _ = fs::remove_file(uncompressed_file_name);
Expand Down Expand Up @@ -156,7 +163,7 @@ mod tests {
fn can_read_and_output_compressed_with_specific_compression_type() {
let input_file = "test_files/dump_file.sql".to_string();
let compressed_file = "test_files/compressed_file_reader_test_results.sql".to_string();
let uncompressed_file_name = "test_files/uncompressed_file_reader_test_results.sql";
let uncompressed_file_name = "test_files/uncompressed_file_reader_can_read_and_output_compressed_with_sepcific_compression_type.sql";

let _ = fs::remove_file(&compressed_file);
let _ = fs::remove_file(uncompressed_file_name);
Expand Down
7 changes: 7 additions & 0 deletions src/fixers/db_mismatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ fn add_missing(current: Vec<StrategyInFile>, missing: &[SimpleColumn]) -> Vec<St
}
None => {
let mut new_table = StrategyInFile {
truncate: false,
table_name: table.clone(),
description: "".to_string(),
columns: vec![],
Expand Down Expand Up @@ -95,6 +96,7 @@ mod tests {
let current = vec![StrategyInFile {
table_name: "public.person".to_string(),
description: "".to_string(),
truncate: false,
columns: vec![ColumnInFile::new("id"), ColumnInFile::new("first_name")],
}];

Expand All @@ -119,6 +121,7 @@ mod tests {
StrategyInFile {
table_name: "public.person".to_string(),
description: "".to_string(),
truncate: false,
columns: vec![
ColumnInFile::new("id"),
ColumnInFile::new("first_name"),
Expand All @@ -128,6 +131,7 @@ mod tests {
StrategyInFile {
table_name: "public.location".to_string(),
description: "".to_string(),
truncate: false,
columns: vec![ColumnInFile::new("id"), ColumnInFile::new("post_code")],
},
];
Expand All @@ -141,11 +145,13 @@ mod tests {
StrategyInFile {
table_name: "public.location".to_string(),
description: "".to_string(),
truncate: false,
columns: vec![ColumnInFile::new("id"), ColumnInFile::new("post_code")],
},
StrategyInFile {
table_name: "public.person".to_string(),
description: "".to_string(),
truncate: false,
columns: vec![
ColumnInFile::new("id"),
ColumnInFile::new("first_name"),
Expand Down Expand Up @@ -174,6 +180,7 @@ mod tests {
let expected = vec![StrategyInFile {
table_name: "public.person".to_string(),
description: "".to_string(),
truncate: false,
columns: vec![ColumnInFile::new("id"), ColumnInFile::new("first_name")],
}];

Expand Down
108 changes: 61 additions & 47 deletions src/parsers/copy_row.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
use crate::parsers::sanitiser;
use crate::parsers::strategies::Strategies;
use crate::parsers::strategies::TableStrategy;
use crate::parsers::strategy_structs::ColumnInfo;
use lazy_static::lazy_static;
use regex::Regex;

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CurrentTableTransforms {
pub table_name: String,
pub columns: Vec<ColumnInfo>,
pub table_transformers: TableTransformers,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum TableTransformers {
ColumnTransformer(Vec<ColumnInfo>),
Truncator,
}

pub fn parse(copy_row: &str, strategies: &Strategies) -> CurrentTableTransforms {
Expand Down Expand Up @@ -36,35 +43,42 @@ fn get_current_table_information(
strategies: &Strategies,
) -> CurrentTableTransforms {
let table_name = sanitiser::dequote_column_or_table_name_data(table);
let column_list: Vec<String> = unsplit_columns
let column_name_list: Vec<String> = unsplit_columns
.split(", ")
.map(sanitiser::dequote_column_or_table_name_data)
.collect();
let columns = columns_from_strategy(strategies, &table_name, &column_list);
let table_transformers = table_strategy(strategies, &table_name, &column_name_list);

CurrentTableTransforms {
table_name,
columns,
table_transformers,
}
}

fn columns_from_strategy(
fn table_strategy(
strategies: &Strategies,
table_name: &str,
column_list: &[String],
) -> Vec<ColumnInfo> {
match strategies.for_table(table_name) {
Some(columns) => column_list
.iter()
.map(|c| match columns.get(c) {
Some(column_info) => column_info.clone(),
None => panic!(
"No transform found for column: {:?} in table: {:?}",
c, table_name
),
})
.collect(),
_ => panic!("No transforms found for table: {:?}", table_name),
column_name_list: &[String],
) -> TableTransformers {
let strategies_for_table = strategies.for_table(table_name);

match strategies_for_table {
Some(TableStrategy::Columns(columns_with_names)) => {
let column_infos = column_name_list
.iter()
.map(|column_name| match columns_with_names.get(column_name) {
Some(column_info) => column_info.clone(),
None => panic!(
"No transform found for column: {:?} in table: {:?}",
column_name, table_name
),
})
.collect();
TableTransformers::ColumnTransformer(column_infos)
}

Some(TableStrategy::Truncate) => TableTransformers::Truncator,
None => panic!("No transforms found for table: {:?}", table_name),
}
}

Expand All @@ -82,48 +96,42 @@ mod tests {

#[test]
fn returns_transforms_for_table() {
let column_infos = HashMap::from([
("id".to_string(), ColumnInfo::builder().build()),
(
"first_name".to_string(),
ColumnInfo::builder()
.with_transformer(TransformerType::FakeFirstName, None)
.build(),
),
(
"last_name".to_string(),
ColumnInfo::builder()
.with_transformer(TransformerType::FakeLastName, None)
.build(),
),
]);
let strategies = Strategies::new_from("public.users".to_string(), column_infos);
let columns = vec![
ColumnInfo::builder().with_name("id").build(),
ColumnInfo::builder()
.with_transformer(TransformerType::FakeFirstName, None)
.with_name("first_name")
.build(),
ColumnInfo::builder()
.with_transformer(TransformerType::FakeLastName, None)
.with_name("last_name")
.build(),
];
let column_infos_with_name: HashMap<String, ColumnInfo> = columns
.iter()
.map(|column| (column.name.clone(), column.clone()))
.collect();
let strategies = Strategies::new_from("public.users".to_string(), column_infos_with_name);
let parsed_copy_row = parse(
"COPY public.users (id, first_name, last_name) FROM stdin;\n",
&strategies,
);

let expected = CurrentTableTransforms {
table_name: "public.users".to_string(),
columns: vec![
ColumnInfo::builder().build(),
ColumnInfo::builder()
.with_transformer(TransformerType::FakeFirstName, None)
.build(),
ColumnInfo::builder()
.with_transformer(TransformerType::FakeLastName, None)
.build(),
],
table_transformers: TableTransformers::ColumnTransformer(columns),
};

assert_eq!(expected.table_name, parsed_copy_row.table_name);
assert_eq!(expected.columns, parsed_copy_row.columns);
assert_eq!(
expected.table_transformers,
parsed_copy_row.table_transformers
);
}

#[test]
fn removes_quotes_around_table_and_column_names() {
let expected_column = ColumnInfo::builder().with_name("from").build();

let strategies = Strategies::new_from(
"public.references".to_string(),
HashMap::from([("from".to_string(), expected_column.clone())]),
Expand All @@ -134,8 +142,14 @@ mod tests {
&strategies,
);

let expected_table_transformers =
TableTransformers::ColumnTransformer(vec![expected_column]);

assert_eq!("public.references", parsed_copy_row.table_name);
assert_eq!(vec![expected_column], parsed_copy_row.columns);
assert_eq!(
expected_table_transformers,
parsed_copy_row.table_transformers
);
}

#[test]
Expand Down
Loading

0 comments on commit 1a4ea33

Please sign in to comment.