Skip to content

Commit

Permalink
Add salt to hashing logic (#194)
Browse files Browse the repository at this point in the history
* Add salt to hashing logic

* Use salt globally

* Move salt config on higher level in strategy file

* Update Readme file
  • Loading branch information
aishwaryavora authored Mar 4, 2025
1 parent 4ee539e commit 06462aa
Show file tree
Hide file tree
Showing 9 changed files with 486 additions and 33 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,24 @@ When using deterministic mode:
- Different IDs will generate different names, even for the same input value

This is useful when you need consistent fake names across multiple database dumps or when maintaining referential integrity between tables.

## Global Salt

The anonymiser supports using a global salt for consistent hashing across different runs. To use this feature, add a salt configuration as the first item in your strategy.json file:

```json
[
{
"salt": "your-global-salt-here"
},
{
"table_name": "public.users",
"description": "",
"columns": [
// ... columns configuration ...
]
}
]
```

The salt will be applied to all transformers that support salted hashing (marked with † in the transformer list). Different salt values will generate different outputs for the same input
7 changes: 7 additions & 0 deletions src/fixers/db_mismatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ fn add_missing(current: Vec<StrategyInFile>, missing: &[SimpleColumn]) -> Vec<St
table_name: table.clone(),
description: "".to_string(),
columns: vec![],
salt: None,
};
for column in missing_columns {
new_table.columns.push(ColumnInFile::new(&column));
Expand Down Expand Up @@ -102,6 +103,7 @@ mod tests {
description: "".to_string(),
truncate: false,
columns: vec![ColumnInFile::new("id"), ColumnInFile::new("first_name")],
salt: None,
}];

let missing = vec![
Expand Down Expand Up @@ -131,12 +133,14 @@ mod tests {
ColumnInFile::new("first_name"),
ColumnInFile::new("last_name"),
],
salt: None,
},
StrategyInFile {
table_name: "public.location".to_string(),
description: "".to_string(),
truncate: false,
columns: vec![ColumnInFile::new("id"), ColumnInFile::new("post_code")],
salt: None,
},
];

Expand All @@ -151,6 +155,7 @@ mod tests {
description: "".to_string(),
truncate: false,
columns: vec![ColumnInFile::new("id"), ColumnInFile::new("post_code")],
salt: None,
},
StrategyInFile {
table_name: "public.person".to_string(),
Expand All @@ -161,6 +166,7 @@ mod tests {
ColumnInFile::new("first_name"),
ColumnInFile::new("last_name"),
],
salt: None,
},
];

Expand All @@ -186,6 +192,7 @@ mod tests {
description: "".to_string(),
truncate: false,
columns: vec![ColumnInFile::new("id"), ColumnInFile::new("first_name")],
salt: None,
}];

assert_eq!(result, expected);
Expand Down
21 changes: 18 additions & 3 deletions src/parsers/copy_row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use regex::Regex;
pub struct CurrentTableTransforms {
pub table_name: String,
pub table_transformers: TableTransformers,
pub salt: Option<String>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
Expand All @@ -27,9 +28,11 @@ pub fn parse(copy_row: &str, strategies: &Strategies) -> CurrentTableTransforms
let some_table = capture_to_item(&cap, "table");
match (some_table, some_columns) {
(Some(table), Some(unsplit_columns)) => {
get_current_table_information(table, unsplit_columns, strategies)
let mut current_table =
get_current_table_information(table, unsplit_columns, strategies);
current_table.salt = strategies.salt_for_table(table).map(String::from);
current_table
}

(_, _) => panic!("Invalid Copy row format: {:?}", copy_row),
}
} else {
Expand All @@ -48,10 +51,12 @@ fn get_current_table_information(
.map(sanitiser::dequote_column_or_table_name_data)
.collect();
let table_transformers = table_strategy(strategies, &table_name, &column_name_list);
let salt = strategies.salt_for_table(&table_name).map(String::from);

CurrentTableTransforms {
table_name,
table_transformers,
salt,
}
}

Expand Down Expand Up @@ -111,7 +116,14 @@ mod tests {
.iter()
.map(|column| (column.name.clone(), column.clone()))
.collect();
let strategies = Strategies::new_from("public.users".to_string(), column_infos_with_name);

let test_salt = "test_table_salt".to_string();
let strategies = Strategies::new_from_with_salt(
"public.users".to_string(),
column_infos_with_name,
Some(test_salt.clone()),
);

let parsed_copy_row = parse(
"COPY public.users (id, first_name, last_name) FROM stdin;\n",
&strategies,
Expand All @@ -120,13 +132,15 @@ mod tests {
let expected = CurrentTableTransforms {
table_name: "public.users".to_string(),
table_transformers: TableTransformers::ColumnTransformer(columns),
salt: Some(test_salt),
};

assert_eq!(expected.table_name, parsed_copy_row.table_name);
assert_eq!(
expected.table_transformers,
parsed_copy_row.table_transformers
);
assert_eq!(expected.salt, parsed_copy_row.salt);
}

#[test]
Expand All @@ -150,6 +164,7 @@ mod tests {
expected_table_transformers,
parsed_copy_row.table_transformers
);
assert_eq!(None, parsed_copy_row.salt);
}

#[test]
Expand Down
17 changes: 14 additions & 3 deletions src/parsers/row_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,14 @@ fn transform_row(
types: &Types,
) -> String {
match current_table.table_transformers {
TableTransformers::ColumnTransformer(ref columns) => {
transform_row_with_columns(rng, line, &current_table.table_name, columns, types)
}
TableTransformers::ColumnTransformer(ref columns) => transform_row_with_columns(
rng,
line,
&current_table.table_name,
columns,
types,
current_table.salt.as_deref(),
),

TableTransformers::Truncator => "".to_string(),
}
Expand All @@ -118,6 +123,7 @@ fn transform_row_with_columns(
table_name: &str,
columns: &[ColumnInfo],
types: &Types,
salt: Option<&str>,
) -> String {
let column_values: Vec<String> = data_row::split(line).map(|s| s.to_string()).collect();

Expand Down Expand Up @@ -151,6 +157,7 @@ fn transform_row_with_columns(
&current_column.transformer,
table_name,
&column_name_values,
salt,
)
});

Expand Down Expand Up @@ -410,6 +417,7 @@ mod tests {
ColumnInfo::builder().with_name("column_2").build(),
ColumnInfo::builder().with_name("column_3").build(),
]),
salt: None,
},
},
types: Types::builder()
Expand Down Expand Up @@ -455,6 +463,7 @@ mod tests {
)
.build(),
]),
salt: None,
},
},
types: Types::builder()
Expand Down Expand Up @@ -490,6 +499,7 @@ mod tests {
.with_transformer(TransformerType::Identity, None)
.build(),
]),
salt: None,
},
},
types: Types::builder()
Expand Down Expand Up @@ -518,6 +528,7 @@ mod tests {
.with_transformer(TransformerType::Scramble, None)
.build(),
]),
salt: None,
},
},
types: Types::builder()
Expand Down
1 change: 1 addition & 0 deletions src/parsers/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ mod tests {
current_table: CurrentTableTransforms {
table_name: "table-mc-tableface".to_string(),
table_transformers: TableTransformers::ColumnTransformer(vec![]),
salt: None,
},
};

Expand Down
Loading

0 comments on commit 06462aa

Please sign in to comment.