From 06462aa31a00b66ba4126882bd0d6525652474e4 Mon Sep 17 00:00:00 2001 From: Aishwarya Vora Date: Tue, 4 Mar 2025 14:04:51 +0000 Subject: [PATCH] Add salt to hashing logic (#194) * Add salt to hashing logic * Use salt globally * Move salt config on higher level in strategy file * Update Readme file --- README.md | 21 ++ src/fixers/db_mismatch.rs | 7 + src/parsers/copy_row.rs | 21 +- src/parsers/row_parser.rs | 17 +- src/parsers/state.rs | 1 + src/parsers/strategies.rs | 101 +++++++++- src/parsers/strategy_structs.rs | 12 ++ src/parsers/transformer.rs | 337 +++++++++++++++++++++++++++++--- src/test_builders.rs | 2 + 9 files changed, 486 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index b3a9887..0f2ba6b 100644 --- a/README.md +++ b/README.md @@ -147,3 +147,24 @@ When using deterministic mode: - Different IDs will generate different names, even for the same input value This is useful when you need consistent fake names across multiple database dumps or when maintaining referential integrity between tables. + +## Global Salt + +The anonymiser supports using a global salt for consistent hashing across different runs. To use this feature, add a salt configuration as the first item in your strategy.json file: + +```json +[ + { + "salt": "your-global-salt-here" + }, + { + "table_name": "public.users", + "description": "", + "columns": [ + // ... columns configuration ... + ] + } +] +``` + +The salt will be applied to all transformers that support salted hashing (marked with † in the transformer list). Different salt values will generate different outputs for the same input \ No newline at end of file diff --git a/src/fixers/db_mismatch.rs b/src/fixers/db_mismatch.rs index 23d739f..2eb07a0 100644 --- a/src/fixers/db_mismatch.rs +++ b/src/fixers/db_mismatch.rs @@ -39,6 +39,7 @@ fn add_missing(current: Vec, missing: &[SimpleColumn]) -> Vec, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -27,9 +28,11 @@ pub fn parse(copy_row: &str, strategies: &Strategies) -> CurrentTableTransforms let some_table = capture_to_item(&cap, "table"); match (some_table, some_columns) { (Some(table), Some(unsplit_columns)) => { - get_current_table_information(table, unsplit_columns, strategies) + let mut current_table = + get_current_table_information(table, unsplit_columns, strategies); + current_table.salt = strategies.salt_for_table(table).map(String::from); + current_table } - (_, _) => panic!("Invalid Copy row format: {:?}", copy_row), } } else { @@ -48,10 +51,12 @@ fn get_current_table_information( .map(sanitiser::dequote_column_or_table_name_data) .collect(); let table_transformers = table_strategy(strategies, &table_name, &column_name_list); + let salt = strategies.salt_for_table(&table_name).map(String::from); CurrentTableTransforms { table_name, table_transformers, + salt, } } @@ -111,7 +116,14 @@ mod tests { .iter() .map(|column| (column.name.clone(), column.clone())) .collect(); - let strategies = Strategies::new_from("public.users".to_string(), column_infos_with_name); + + let test_salt = "test_table_salt".to_string(); + let strategies = Strategies::new_from_with_salt( + "public.users".to_string(), + column_infos_with_name, + Some(test_salt.clone()), + ); + let parsed_copy_row = parse( "COPY public.users (id, first_name, last_name) FROM stdin;\n", &strategies, @@ -120,6 +132,7 @@ mod tests { let expected = CurrentTableTransforms { table_name: "public.users".to_string(), table_transformers: TableTransformers::ColumnTransformer(columns), + salt: Some(test_salt), }; assert_eq!(expected.table_name, parsed_copy_row.table_name); @@ -127,6 +140,7 @@ mod tests { expected.table_transformers, parsed_copy_row.table_transformers ); + assert_eq!(expected.salt, parsed_copy_row.salt); } #[test] @@ -150,6 +164,7 @@ mod tests { expected_table_transformers, parsed_copy_row.table_transformers ); + assert_eq!(None, parsed_copy_row.salt); } #[test] diff --git a/src/parsers/row_parser.rs b/src/parsers/row_parser.rs index b8227f3..5f8a1cc 100644 --- a/src/parsers/row_parser.rs +++ b/src/parsers/row_parser.rs @@ -104,9 +104,14 @@ fn transform_row( types: &Types, ) -> String { match current_table.table_transformers { - TableTransformers::ColumnTransformer(ref columns) => { - transform_row_with_columns(rng, line, ¤t_table.table_name, columns, types) - } + TableTransformers::ColumnTransformer(ref columns) => transform_row_with_columns( + rng, + line, + ¤t_table.table_name, + columns, + types, + current_table.salt.as_deref(), + ), TableTransformers::Truncator => "".to_string(), } @@ -118,6 +123,7 @@ fn transform_row_with_columns( table_name: &str, columns: &[ColumnInfo], types: &Types, + salt: Option<&str>, ) -> String { let column_values: Vec = data_row::split(line).map(|s| s.to_string()).collect(); @@ -151,6 +157,7 @@ fn transform_row_with_columns( ¤t_column.transformer, table_name, &column_name_values, + salt, ) }); @@ -410,6 +417,7 @@ mod tests { ColumnInfo::builder().with_name("column_2").build(), ColumnInfo::builder().with_name("column_3").build(), ]), + salt: None, }, }, types: Types::builder() @@ -455,6 +463,7 @@ mod tests { ) .build(), ]), + salt: None, }, }, types: Types::builder() @@ -490,6 +499,7 @@ mod tests { .with_transformer(TransformerType::Identity, None) .build(), ]), + salt: None, }, }, types: Types::builder() @@ -518,6 +528,7 @@ mod tests { .with_transformer(TransformerType::Scramble, None) .build(), ]), + salt: None, }, }, types: Types::builder() diff --git a/src/parsers/state.rs b/src/parsers/state.rs index c92a67e..a4b87d2 100644 --- a/src/parsers/state.rs +++ b/src/parsers/state.rs @@ -96,6 +96,7 @@ mod tests { current_table: CurrentTableTransforms { table_name: "table-mc-tableface".to_string(), table_transformers: TableTransformers::ColumnTransformer(vec![]), + salt: None, }, }; diff --git a/src/parsers/strategies.rs b/src/parsers/strategies.rs index 0253572..ea51257 100644 --- a/src/parsers/strategies.rs +++ b/src/parsers/strategies.rs @@ -9,6 +9,7 @@ type ColumnNamesToInfo = HashMap; #[derive(Debug, PartialEq, Eq)] pub struct Strategies { tables: HashMap, + salt: Option, } #[derive(Debug, Clone, Eq, PartialEq)] @@ -21,6 +22,7 @@ impl Strategies { pub fn new() -> Strategies { Strategies { tables: HashMap::new(), + salt: None, } } @@ -31,7 +33,20 @@ impl Strategies { let mut transformed_strategies = Strategies::new(); let mut errors = ValidationErrors::new(); + // Check if the first item is a salt configuration + if let Some(first) = strategies_in_file.first() { + transformed_strategies.salt = if first.table_name.is_empty() { + first.salt.clone() + } else { + None + }; + } + for strategy in strategies_in_file { + if strategy.table_name.is_empty() { + continue; + } + // Validate deterministic settings for column in &strategy.columns { if let Some(args) = &column.transformer.args { @@ -184,6 +199,27 @@ impl Strategies { pub fn new_from(table_name: String, columns: HashMap) -> Strategies { Strategies { tables: HashMap::from([(table_name, TableStrategy::Columns(columns))]), + salt: None, + } + } + + #[allow(dead_code)] //This is used in tests for convenience + pub fn new_from_with_salt( + table_name: String, + columns: HashMap, + salt: Option, + ) -> Strategies { + Strategies { + tables: HashMap::from([(table_name, TableStrategy::Columns(columns))]), + salt, + } + } + + pub fn salt_for_table(&self, table_name: &str) -> Option<&str> { + if self.tables.contains_key(table_name) { + self.salt.as_deref() + } else { + None } } } @@ -240,6 +276,7 @@ mod tests { &mut strategies, "public.location", [create_column("postcode")].into_iter(), + None, ); let columns_from_db = HashSet::from([ @@ -281,6 +318,7 @@ mod tests { &mut strategies, "public.location", [create_column("postcode")].into_iter(), + None, ); let columns_from_db = HashSet::from([create_simple_column("public.person", "first_name")]); @@ -351,6 +389,7 @@ mod tests { table_name: TABLE_NAME.to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![column_in_file( DataCategory::Pii, column_name, @@ -374,6 +413,51 @@ mod tests { assert_eq!(expected, parsed); } + #[test] + fn from_strategies_in_file_can_parse_file_contents_with_salt_into_hashmaps() { + let column_name = "column1"; + let salt = "test_salt".to_string(); + + let strategies = vec![ + // First item is salt configuration (matches JSON structure) + StrategyInFile { + table_name: String::default(), // Will be empty string + description: String::default(), + truncate: false, + salt: Some(salt.clone()), + columns: Vec::default(), + }, + // Actual table strategy + StrategyInFile { + table_name: TABLE_NAME.to_string(), + description: "description".to_string(), + truncate: false, + salt: None, + columns: vec![column_in_file( + DataCategory::Pii, + column_name, + TransformerType::Scramble, + )], + }, + ]; + + let expected = Strategies::new_from_with_salt( + TABLE_NAME.to_string(), + HashMap::from([( + column_name.to_string(), + ColumnInfo::builder() + .with_name(column_name) + .with_data_category(DataCategory::Pii) + .with_transformer(TransformerType::Scramble, None) + .build(), + )]), + Some(salt), + ); + let parsed = Strategies::from_strategies_in_file(strategies, &TransformerOverrides::none()) + .expect("we shouldnt have duplicate columns!"); + assert_eq!(expected, parsed); + } + #[test] fn from_strategies_in_file_returns_errors_for_duplicate_table_and_column_definitions() { let table2_name = "daps"; @@ -386,18 +470,21 @@ mod tests { table_name: TABLE_NAME.to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![], }, StrategyInFile { table_name: TABLE_NAME.to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![], }, StrategyInFile { table_name: table2_name.to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![duplicated_column.clone(), duplicated_column], }, ]; @@ -418,6 +505,7 @@ mod tests { table_name: "public.person".to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![column_in_file( DataCategory::Unknown, "first_name", @@ -440,6 +528,7 @@ mod tests { table_name: "public.person".to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![column_in_file( DataCategory::General, "first_name", @@ -462,6 +551,7 @@ mod tests { table_name: "public.person".to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![ column_in_file(DataCategory::Pii, "first_name", TransformerType::Identity), column_in_file( @@ -491,6 +581,7 @@ mod tests { table_name: TABLE_NAME.to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![ column_in_file( DataCategory::PotentialPii, @@ -534,6 +625,7 @@ mod tests { table_name: TABLE_NAME.to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![ column_in_file( DataCategory::PotentialPii, @@ -578,6 +670,7 @@ mod tests { table_name: TABLE_NAME.to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![column_in_file( DataCategory::General, SCRAMBLED_COLUMN_NAME, @@ -605,6 +698,7 @@ mod tests { table_name: TABLE_NAME.to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![ column_in_file( DataCategory::PotentialPii, @@ -658,6 +752,7 @@ mod tests { table_name: "public.person".to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![ColumnInFile { data_category: DataCategory::General, description: "first_name".to_string(), @@ -696,6 +791,7 @@ mod tests { table_name: "public.person".to_string(), description: "description".to_string(), truncate: false, + salt: None, columns: vec![ColumnInFile { data_category: DataCategory::General, description: "first_name".to_string(), @@ -736,15 +832,16 @@ mod tests { I: Iterator, { let mut strategies = Strategies::new(); - add_table(&mut strategies, table_name, columns); + add_table(&mut strategies, table_name, columns, None); strategies } - fn add_table(strategies: &mut Strategies, table_name: &str, columns: I) + fn add_table(strategies: &mut Strategies, table_name: &str, columns: I, salt: Option) where I: Iterator, { strategies.insert(table_name.to_string(), HashMap::from_iter(columns)); + strategies.salt = salt; } fn create_column(column_name: &str) -> (String, ColumnInfo) { diff --git a/src/parsers/strategy_structs.rs b/src/parsers/strategy_structs.rs index 76098cc..dfadea9 100644 --- a/src/parsers/strategy_structs.rs +++ b/src/parsers/strategy_structs.rs @@ -45,12 +45,19 @@ impl PartialEq for ColumnInFile { #[derive(Clone, Debug, Eq, Serialize, Deserialize)] pub struct StrategyInFile { + #[serde(default)] pub table_name: String, + + #[serde(default)] pub description: String, #[serde(default)] pub truncate: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub salt: Option, + + #[serde(default)] pub columns: Vec, } @@ -164,3 +171,8 @@ impl Default for TransformerOverrides { Self::none() } } + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct SaltConfig { + pub salt: String, +} diff --git a/src/parsers/transformer.rs b/src/parsers/transformer.rs index 75efd27..8c157ab 100644 --- a/src/parsers/transformer.rs +++ b/src/parsers/transformer.rs @@ -28,11 +28,13 @@ fn get_unique() -> usize { UNIQUE_INTEGER.fetch_add(1, Ordering::SeqCst) } -fn get_faker_rng(value: &str, id: Option<&str>) -> SmallRng { +fn get_faker_rng(value: &str, id: Option<&str>, salt: Option<&str>) -> SmallRng { let mut hasher = Sha256::new(); - let combined = match id { - Some(id) => format!("{}{}", value, id), - None => value.to_string(), + let combined = match (id, salt) { + (Some(id), Some(salt)) => format!("{}{}{}", value, id, salt), + (Some(id), None) => format!("{}{}", value, id), + (None, Some(salt)) => format!("{}{}", value, salt), + (None, None) => value.to_string(), }; hasher.update(combined.as_bytes()); let seed = u64::from_le_bytes(hasher.finalize()[..8].try_into().unwrap()); @@ -46,6 +48,7 @@ pub fn transform<'line>( transformer: &'line Transformer, table_name: &str, column_values: &[(String, String)], + global_salt: Option<&str>, ) -> Cow<'line, str> { if ["\\N", "deleted"].contains(&value) { return Cow::from(value); @@ -66,6 +69,7 @@ pub fn transform<'line>( transformer, table_name, column_values, + global_salt, ); } @@ -92,18 +96,32 @@ pub fn transform<'line>( TransformerType::FakeBase16String => Cow::from(fake_base16_string()), TransformerType::FakeBase32String => Cow::from(fake_base32_string()), TransformerType::FakeCity => Cow::from(CityName().fake::()), - TransformerType::FakeCompanyName => { - Cow::from(fake_company_name(value, &transformer.args, unique)) + TransformerType::FakeCompanyName => Cow::from(fake_company_name( + value, + &transformer.args, + unique, + global_salt, + )), + TransformerType::FakeEmail => { + Cow::from(fake_email(value, &transformer.args, unique, global_salt)) } - TransformerType::FakeEmail => Cow::from(fake_email(value, &transformer.args, unique)), - TransformerType::FakeEmailOrPhone => { - Cow::from(fake_email_or_phone(value, &transformer.args, unique)) + TransformerType::FakeEmailOrPhone => Cow::from(fake_email_or_phone( + value, + &transformer.args, + unique, + global_salt, + )), + TransformerType::FakeFirstName => { + Cow::from(fake_first_name(value, &transformer.args, id, global_salt)) } - TransformerType::FakeFirstName => Cow::from(fake_first_name(value, &transformer.args, id)), TransformerType::FakeFullAddress => Cow::from(fake_full_address()), - TransformerType::FakeFullName => Cow::from(fake_full_name(value, &transformer.args, id)), + TransformerType::FakeFullName => { + Cow::from(fake_full_name(value, &transformer.args, id, global_salt)) + } TransformerType::FakeIPv4 => Cow::from(IPv4().fake::()), - TransformerType::FakeLastName => Cow::from(fake_last_name(value, &transformer.args, id)), + TransformerType::FakeLastName => { + Cow::from(fake_last_name(value, &transformer.args, id, global_salt)) + } TransformerType::FakeNationalIdentityNumber => Cow::from(fake_national_identity_number()), TransformerType::FakePostCode => Cow::from(fake_postcode(value)), TransformerType::FakePhoneNumber => Cow::from(fake_phone_number(value)), @@ -127,6 +145,7 @@ fn transform_array<'value>( transformer: &Transformer, table_name: &str, column_values: &[(String, String)], + global_salt: Option<&str>, ) -> Cow<'value, str> { let quoted_types = [SubType::Character, SubType::Json]; let requires_quotes = quoted_types.contains(underlying_type); @@ -143,6 +162,7 @@ fn transform_array<'value>( transformer, table_name, column_values, + global_salt, ) } else { let unsplit_array = &value[1..value.len() - 1]; @@ -156,6 +176,7 @@ fn transform_array<'value>( transformer, table_name, column_values, + global_salt, ) }) .collect::>>() @@ -171,6 +192,7 @@ fn transform_quoted_array( transformer: &Transformer, table_name: &str, column_values: &[(String, String)], + global_salt: Option<&str>, ) -> String { let mut inside_word = false; let mut word_is_quoted = false; @@ -202,6 +224,7 @@ fn transform_quoted_array( transformer, table_name, column_values, + global_salt, ); write!(word_acc, "\"{}\",", &transformed) .expect("Should be able to apppend to word_acc"); @@ -231,7 +254,7 @@ fn transform_quoted_array( fn is_deterministic(args: &Option>) -> bool { args.as_ref() .and_then(|args| args.get("deterministic")) - .map_or(false, |val| val == "true") + .is_some_and(|val| val == "true") } fn prepend_unique_if_present( @@ -265,14 +288,24 @@ fn fake_base32_string() -> String { base32::encode(Alphabet::Rfc4648 { padding: true }, &random_bytes) } -fn fake_company_name(value: &str, args: &Option>, unique: usize) -> String { - let mut seeded_rng = get_faker_rng(value, None); +fn fake_company_name( + value: &str, + args: &Option>, + unique: usize, + global_salt: Option<&str>, +) -> String { + let mut seeded_rng = get_faker_rng(value, None, global_salt); let new_company_name = CompanyName().fake_with_rng::(&mut seeded_rng); prepend_unique_if_present(new_company_name, args, unique) } -fn fake_email(value: &str, args: &Option>, unique: usize) -> String { - let mut seeded_rng = get_faker_rng(value, None); +fn fake_email( + value: &str, + args: &Option>, + unique: usize, + global_salt: Option<&str>, +) -> String { + let mut seeded_rng = get_faker_rng(value, None, global_salt); let new_email = FreeEmail().fake_with_rng::(&mut seeded_rng); prepend_unique_if_present(new_email, args, unique) } @@ -281,11 +314,12 @@ fn fake_email_or_phone( current_value: &str, optional_args: &Option>, unique: usize, + global_salt: Option<&str>, ) -> String { if current_value.starts_with('+') && !current_value.contains('@') { fake_phone_number(current_value) } else { - fake_email(current_value, optional_args, unique) + fake_email(current_value, optional_args, unique, global_salt) } } @@ -306,38 +340,49 @@ fn fake_first_name( value: &str, args: &Option>, id: Option<&str>, + global_salt: Option<&str>, ) -> String { let deterministic = is_deterministic(args); let id_to_use = if deterministic { id } else { None }; match id_to_use { Some(id) => { - let mut seeded_rng = get_faker_rng(value, Some(id)); + let mut seeded_rng = get_faker_rng(value, Some(id), global_salt); FirstName().fake_with_rng::(&mut seeded_rng) } None => FirstName().fake::(), } } -fn fake_last_name(value: &str, args: &Option>, id: Option<&str>) -> String { +fn fake_last_name( + value: &str, + args: &Option>, + id: Option<&str>, + global_salt: Option<&str>, +) -> String { let deterministic = is_deterministic(args); let id_to_use = if deterministic { id } else { None }; match id_to_use { Some(id) => { - let mut seeded_rng = get_faker_rng(value, Some(id)); + let mut seeded_rng = get_faker_rng(value, Some(id), global_salt); LastName().fake_with_rng::(&mut seeded_rng) } None => LastName().fake::(), } } -fn fake_full_name(value: &str, args: &Option>, id: Option<&str>) -> String { +fn fake_full_name( + value: &str, + args: &Option>, + id: Option<&str>, + global_salt: Option<&str>, +) -> String { let deterministic = is_deterministic(args); let id_to_use = if deterministic { id } else { None }; - let first = fake_first_name(&format!("{}_first", value), args, id_to_use); - let last = fake_last_name(&format!("{}_last", value), args, id_to_use); + let first = fake_first_name(&format!("{}_first", value), args, id_to_use, global_salt); + let last = fake_last_name(&format!("{}_last", value), args, id_to_use, global_salt); format!("{} {}", first, last) } @@ -480,6 +525,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!(new_null, null); } @@ -500,6 +546,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!(new_deleted, deleted); } @@ -520,6 +567,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_first_name == first_name); } @@ -540,6 +588,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_verification_key != verification_key); assert_eq!(new_verification_key.len(), 32); @@ -561,6 +610,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_verification_key != verification_key); assert_eq!(new_verification_key.len(), 32); @@ -583,6 +633,7 @@ mod tests { &transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_company_name != company_name); @@ -595,6 +646,7 @@ mod tests { &transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!( new_company_name, repeat_company_name, @@ -610,6 +662,7 @@ mod tests { &transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_ne!( new_company_name, different_company_name, @@ -634,6 +687,7 @@ mod tests { transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_company_name != company_name); let re = Regex::new(r"^[0-9]+-.*").unwrap(); @@ -644,6 +698,67 @@ mod tests { ); } + #[test] + fn fake_company_name_with_salt() { + let company_name = "Acme Inc"; + let mut rng = rng::get(); + + // Create transformer + let transformer = Transformer { + name: TransformerType::FakeCompanyName, + args: None, + }; + + // Test with salt + let new_company_name_with_salt = transform( + &mut rng, + company_name, + &Type::SingleValue { + sub_type: SubType::Character, + }, + &transformer, + TABLE_NAME, + EMPTY_COLUMNS, + Some("test_salt"), + ); + + // Test without salt + let new_company_name_without_salt = transform( + &mut rng, + company_name, + &Type::SingleValue { + sub_type: SubType::Character, + }, + &transformer, + TABLE_NAME, + EMPTY_COLUMNS, + None, + ); + + assert_ne!( + new_company_name_with_salt, new_company_name_without_salt, + "Same input with and without salt should produce different fake company names" + ); + + // Test with different salt + let new_company_name_with_different_salt = transform( + &mut rng, + company_name, + &Type::SingleValue { + sub_type: SubType::Character, + }, + &transformer, + TABLE_NAME, + EMPTY_COLUMNS, + Some("different_salt"), + ); + + assert_ne!( + new_company_name_with_salt, new_company_name_with_different_salt, + "Same input with different salts should produce different fake company names" + ); + } + #[test] fn fake_email() { let email = "any email"; @@ -662,6 +777,7 @@ mod tests { &transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_email != email); @@ -681,6 +797,7 @@ mod tests { &transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!( new_email, repeat_email, @@ -696,6 +813,7 @@ mod tests { &transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_ne!( new_email, different_email, @@ -720,6 +838,7 @@ mod tests { transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_email != email); let re = Regex::new(r"^[0-9]+-.*@.*\..*").unwrap(); @@ -746,6 +865,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_ne!(new_first_name, first_name); @@ -776,6 +896,7 @@ mod tests { &transformer, TABLE_NAME, &column_values, + None, ); let repeat_first_name_for_user1 = transform( @@ -787,6 +908,7 @@ mod tests { &transformer, TABLE_NAME, &column_values, + None, ); assert_eq!( @@ -806,6 +928,7 @@ mod tests { &transformer, TABLE_NAME, &column_values_user2, + None, ); assert_ne!( @@ -814,6 +937,72 @@ mod tests { ); } + #[test] + fn fake_first_name_with_salt() { + let first_name = "John"; + let mut rng = rng::get(); + + // Create transformer with deterministic args + let transformer = Transformer { + name: TransformerType::FakeFirstName, + args: Some(HashMap::from([ + ("deterministic".to_string(), "true".to_string()), + ("id_column".to_string(), "user_id".to_string()), + ])), + }; + + let column_values = vec![("user_id".to_string(), "123".to_string())]; + + // Test with salt + let first_name_with_salt = transform( + &mut rng, + first_name, + &Type::SingleValue { + sub_type: SubType::Character, + }, + &transformer, + TABLE_NAME, + &column_values, + Some("test_salt"), + ); + + // Test without salt + let first_name_without_salt = transform( + &mut rng, + first_name, + &Type::SingleValue { + sub_type: SubType::Character, + }, + &transformer, + TABLE_NAME, + &column_values, + None, + ); + + assert_ne!( + first_name_with_salt, first_name_without_salt, + "Same input with and without salt should produce different fake names" + ); + + // Test with different salt + let first_name_with_different_salt = transform( + &mut rng, + first_name, + &Type::SingleValue { + sub_type: SubType::Character, + }, + &transformer, + TABLE_NAME, + &column_values, + Some("different_salt"), + ); + + assert_ne!( + first_name_with_salt, first_name_with_different_salt, + "Same input with different salts should produce different fake names" + ); + } + #[test] fn fake_full_name_random() { let full_name = "John Smith"; @@ -833,6 +1022,7 @@ mod tests { &transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_ne!(new_full_name, full_name); @@ -862,6 +1052,7 @@ mod tests { &transformer, TABLE_NAME, &column_values, + None, ); let repeat_full_name_for_user1 = transform( @@ -873,6 +1064,7 @@ mod tests { &transformer, TABLE_NAME, &column_values, + None, ); assert_eq!( @@ -892,6 +1084,7 @@ mod tests { &transformer, TABLE_NAME, &column_values_user2, + None, ); assert_ne!( @@ -916,6 +1109,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_ne!(new_last_name, last_name); @@ -945,6 +1139,7 @@ mod tests { &transformer, TABLE_NAME, &column_values, + None, ); let repeat_last_name_for_user1 = transform( @@ -956,6 +1151,7 @@ mod tests { &transformer, TABLE_NAME, &column_values, + None, ); assert_eq!( @@ -975,6 +1171,7 @@ mod tests { &transformer, TABLE_NAME, &column_values_user2, + None, ); assert_ne!( @@ -999,6 +1196,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_street_address != street_address); } @@ -1019,6 +1217,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_national_identity_number != national_identity_number); assert!(national_insurance_number::NATIONAL_INSURANCE_NUMBERS @@ -1041,6 +1240,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_phone_number != phone_number); assert!(new_phone_number.starts_with("+4477009")); @@ -1062,6 +1262,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_email != email); assert!(new_email.contains('@')); @@ -1083,6 +1284,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_phone_number != phone_number); assert!(new_phone_number.starts_with("+4477009")); @@ -1105,6 +1307,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_phone_number != phone_number); assert!(new_phone_number.starts_with("+1")); @@ -1127,6 +1330,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!(new_postcode, "NW5"); } @@ -1147,6 +1351,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_user_name != user_name); } @@ -1168,6 +1373,7 @@ mod tests { transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_user_name != user_name); @@ -1200,6 +1406,7 @@ mod tests { transformer, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!(new_url, fixed_url); } @@ -1220,6 +1427,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); } @@ -1241,6 +1449,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!(obfuscated_date, "2020-12-01"); } @@ -1266,6 +1475,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); } @@ -1287,6 +1497,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!(result, "0001-08-01 BC"); } @@ -1309,6 +1520,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value != initial_value); assert_eq!(new_value.chars().count(), initial_value.chars().count()); @@ -1335,6 +1547,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); let re = Regex::new(r"^[a-z][a-z]\\.\\?").unwrap(); assert!( @@ -1361,6 +1574,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); let re = Regex::new(r"^[a-z]{2} [0-9]{2} [a-z][0-9][a-z][0-9]").unwrap(); assert!( @@ -1372,7 +1586,7 @@ mod tests { #[test] fn scramble_calculates_unicode_length_correctly() { - let initial_value = "한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한"; + let initial_value = "한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한한"; let mut rng = rng::get(); let new_value = transform( &mut rng, @@ -1386,6 +1600,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value != initial_value); assert_eq!(new_value.chars().count(), initial_value.chars().count()); @@ -1408,6 +1623,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value != initial_value); //TODO finish this test @@ -1430,6 +1646,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value != initial_value); assert!(!new_value.contains("Second line")); @@ -1453,6 +1670,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value != initial_value); let re = Regex::new(r"^[0-9]{9}$").unwrap(); @@ -1479,6 +1697,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value != initial_value); let re = Regex::new(r#"^\{"[a-z]","[a-z]","[a-z] [a-z]{2} [a-z]"\}$"#).unwrap(); @@ -1505,6 +1724,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value != initial_value); let re = Regex::new(r#"^\{"[a-z]{2} [a-z]{2} [a-z]","[a-z]"\}$"#).unwrap(); @@ -1532,6 +1752,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!(new_value, initial_value); } @@ -1552,6 +1773,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value != initial_value); let re = Regex::new(r#"^\{[0-9],[0-9]{2},[0-9]{3},[0-9]{4}\}$"#).unwrap(); @@ -1579,6 +1801,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value == "______ ____"); @@ -1601,6 +1824,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert!(new_value == r#"___\n___\n_____"#); @@ -1622,6 +1846,7 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!(new_json, "{\"{}\",\"{}\"}"); } @@ -1644,7 +1869,69 @@ mod tests { }, TABLE_NAME, EMPTY_COLUMNS, + None, ); assert_eq!(new_json, "{}"); } + + #[test] + fn fake_email_with_salt() { + let email = "john.doe@example.com"; + let mut rng = rng::get(); + + // Create transformer + let transformer = Transformer { + name: TransformerType::FakeEmail, + args: None, + }; + + // Test with salt + let new_email_with_salt = transform( + &mut rng, + email, + &Type::SingleValue { + sub_type: SubType::Character, + }, + &transformer, + TABLE_NAME, + EMPTY_COLUMNS, + Some("test_salt"), + ); + + // Test without salt + let new_email_without_salt = transform( + &mut rng, + email, + &Type::SingleValue { + sub_type: SubType::Character, + }, + &transformer, + TABLE_NAME, + EMPTY_COLUMNS, + None, + ); + + assert_ne!( + new_email_with_salt, new_email_without_salt, + "Same input with and without salt should produce different fake emails" + ); + + // Test with different salt + let new_email_with_different_salt = transform( + &mut rng, + email, + &Type::SingleValue { + sub_type: SubType::Character, + }, + &transformer, + TABLE_NAME, + EMPTY_COLUMNS, + Some("different_salt"), + ); + + assert_ne!( + new_email_with_salt, new_email_with_different_salt, + "Same input with different salts should produce different fake emails" + ); + } } diff --git a/src/test_builders.rs b/src/test_builders.rs index d73dc33..7a1cf10 100644 --- a/src/test_builders.rs +++ b/src/test_builders.rs @@ -59,6 +59,7 @@ pub mod builders { table_name: String, description: Option, columns: Vec, + salt: Option, } impl StrategyInFile { @@ -85,6 +86,7 @@ pub mod builders { description: self .description .unwrap_or_else(|| "Any description".to_string()), + salt: self.salt, columns: self.columns, } }