diff --git a/src/AddressExtractor.cs b/src/AddressExtractor.cs index bd2000c..0322e0d 100644 --- a/src/AddressExtractor.cs +++ b/src/AddressExtractor.cs @@ -93,15 +93,7 @@ public async IAsyncEnumerable ExtractAddressesAsync(IPerformanceStack st if (valid is Result.DENY) continue; - yield return address.Full - // Simple cleanups that may be possible via the regex - .Replace("'", string.Empty) - .Replace("!", string.Empty) - .Replace("`", string.Empty) - .Replace("{", string.Empty) - .Replace("#", string.Empty) - .Replace(@"\n", string.Empty) - .Replace("\\\"", string.Empty); + yield return address.Full; } } } diff --git a/src/Objects/Filters/ReplaceInvalidFilter.cs b/src/Objects/Filters/ReplaceInvalidFilter.cs new file mode 100644 index 0000000..396cf59 --- /dev/null +++ b/src/Objects/Filters/ReplaceInvalidFilter.cs @@ -0,0 +1,30 @@ +using System.Text.RegularExpressions; + +namespace HaveIBeenPwned.AddressExtractor.Objects.Filters { + /// + /// Checks if the full email starts with specifically illegal characters and trims them until there are no more illegal characters. /// + public partial class ReplaceInvalidFilter : AddressFilter.BaseFilter { + [GeneratedRegex(@"^['!`\{#\\n\\\\]+(.*)")] + public static partial Regex StartsWithCharacter(); + + /// + public override string Name => "TrimIllegalStartChars"; + + /// + public override Result ValidateEmailAddress(ref EmailAddress address) { + Match match = ReplaceInvalidFilter.StartsWithCharacter() + .Match(address.Full); + + if ( match is not { Length: > 0 } ) + return Result.CONTINUE; + + address.Full = match.Groups[1].Value; + + // If the email is now empty, it was only consisting of illegal characters + return address.Length is 0 ? Result.DENY : Result.REVALIDATE; + + } + + + } +}