From 1c2a429204950491bfaba8896ba6e7a4a917cadd Mon Sep 17 00:00:00 2001 From: eternal-flame-AD Date: Wed, 11 Dec 2024 22:29:41 -0600 Subject: [PATCH] More flexible parsing options and a stricter default Signed-off-by: eternal-flame-AD --- README.md | 24 +++- package.json | 2 +- src/index.ts | 24 ++-- src/options/options.ts | 4 +- src/output/output.ts | 2 +- src/regex/regex.ts | 91 ++++++++++++-- test/__snapshots__/index.test.ts.snap | 163 +++++++++++++++++++++++++- test/index.test.ts | 62 ++++++++++ 8 files changed, 348 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 3b33c5e..4ad2318 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ Validates email addresses based on regex, common typos, disposable email blacklists, DNS records and SMTP server response. -- Validates email looks like an email i.e. contains an "@" and a "." to the right of it. +- Identify strings that looks like an email using the `isEmail` function (i.e. contains an "@" and a "." to the right of it). +- Validates email address using a regex with options to change parsing strictness, see [parse options](#parse-options). - Validates common typos e.g. example@gmaill.com using [mailcheck](https://github.com/mailcheck/mailcheck). - Validates email was not generated by disposable email service using [disposable-email-domains](https://github.com/ivolo/disposable-email-domains). - Validates MX records are present on DNS. @@ -84,6 +85,27 @@ await validate({ ``` For a list of TLDs that are supported by default you can see [here](https://github.com/mailcheck/mailcheck/blob/afca031b4ce1cdc6e3ecbe88198f41b4835f81e3/src/mailcheck.js#L31). +> [!IMPORTANT] +> You must enable `validateRegex` for other validations to be reliable. + +## Parse Options + +The email address specification is quite complex and there are multiple conflicting standards, the default options are based on WhatWG recommendation which is the reference for form validation in most modern browsers. However depending on your use case you may want to override these options: + +```typescript +export type ParseEmailOptions = { + // Allow RFC 5322 angle address such as '"Name" ' + // use this option if you want to parse emails from headers or envelope addresses + allowAngle?: boolean, + // Allow RFC 5322 quoted email address such as '"this+is+my+personal+email+address@me.invalid"@gmail.com' + // use this option if you want to accept lesser known email address formats + allowQuoted?: boolean, + // Reject addresses containing "+", which is used for subaddressing + // use this option to enforce one email per user + rejectSubaddressing?: boolean, +}; +``` + ## [Default options can be found here](https://github.com/mfbx9da4/deep-email-validator/blob/8bbd9597a7ce435f0a77889a45daccdd5d7c3488/src/options/options.ts#L1) diff --git a/package.json b/package.json index f85d901..b8d1e66 100644 --- a/package.json +++ b/package.json @@ -61,4 +61,4 @@ "ts-node": "^10.4.0", "typescript": "^3.8.3" } -} +} \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index e7b27ea..33fe8c5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,4 @@ -import { isEmail } from './regex/regex' +import { isEmail, parseEmail } from './regex/regex' import { checkTypo } from './typo/typo' import { getBestMx } from './dns/dns' import { checkSMTP } from './smtp/smtp' @@ -9,11 +9,21 @@ import './types' export async function validate(emailOrOptions: string | ValidatorOptions): Promise { const options = getOptions(emailOrOptions) - const email = options.email - - if (options.validateRegex) { - const regexResponse = isEmail(email) - if (regexResponse) return createOutput('regex', regexResponse) + const emailRaw = options.email + + const regexResponse = parseEmail(emailRaw, { + allowQuoted: options.allowQuoted, + allowAngle: options.allowAngle, + rejectSubaddressing: options.rejectSubaddressing, + }) + if (options.validateRegex && 'error' in regexResponse) return createOutput('regex', regexResponse.error) + // fallback to the naive domain extraction if the user specifically opted out of format validation + const domain = 'domain' in regexResponse ? regexResponse.domain : emailRaw.split('@')[1] + const email = 'effectiveAddr' in regexResponse ? regexResponse.effectiveAddr : emailRaw.trim() + + // prevent SMTP injection + if (email.indexOf('\r') !== -1 || email.indexOf('\n') !== -1) { + return createOutput('sanitization', 'Email cannot contain newlines') } if (options.validateTypo) { @@ -21,8 +31,6 @@ export async function validate(emailOrOptions: string | ValidatorOptions): Promi if (typoResponse) return createOutput('typo', typoResponse) } - const domain = email.split('@')[1] - if (options.validateDisposable) { const disposableResponse = await checkDisposable(domain) if (disposableResponse) return createOutput('disposable', disposableResponse) diff --git a/src/options/options.ts b/src/options/options.ts index 1fb46ce..2af8952 100644 --- a/src/options/options.ts +++ b/src/options/options.ts @@ -1,3 +1,5 @@ +import { ParseEmailOptions } from '../regex/regex' + const defaultOptions: ValidatorOptionsFinal = { email: 'name@example.org', sender: 'name@example.org', @@ -15,7 +17,7 @@ type Options = { validateTypo: boolean validateDisposable: boolean validateSMTP: boolean -} +} & ParseEmailOptions type MailCheckOptions = { additionalTopLevelDomains?: string[] diff --git a/src/output/output.ts b/src/output/output.ts index f1fd4cd..c20a091 100644 --- a/src/output/output.ts +++ b/src/output/output.ts @@ -1,6 +1,6 @@ import { ElementType } from '../types' -const OrderedLevels = ['regex', 'typo', 'disposable', 'mx', 'smtp'] as const +const OrderedLevels = ['sanitization', 'regex', 'typo', 'disposable', 'mx', 'smtp'] as const export type SubOutputFormat = { valid: boolean diff --git a/src/regex/regex.ts b/src/regex/regex.ts index 5128775..655c313 100644 --- a/src/regex/regex.ts +++ b/src/regex/regex.ts @@ -1,15 +1,84 @@ -export const isEmail = (email: string): string | undefined => { - email = (email || '').trim() - if (email.length === 0) { - return 'Email not provided' +// The RFC 5322 3.4.1 quoted flavor of email addresses which accepts more characters +const emailRegexAddrSpecRFC5322Quoted = + /^"([\x21\x23-\x5B\x5D-\x7E]+)"@([a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$/ + +// The WhatWG standard for email addresses, this is usually what you want for web forms. +// https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address +const emailRegexAddrSpecWhatWG = + /^([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+)@([a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$/ + +export type ParseEmailOptions = { + // Allow RFC 5322 angle address such as '"Name" ' + // use this option if you want to parse emails from headers or envelope addresses + allowAngle?: boolean + // Allow RFC 5322 quoted email address such as '"this+is+my+personal+email+address@me.invalid"@gmail.com' + // use this option if you want to accept lesser known email address formats + allowQuoted?: boolean + // Reject addresses containing "+", which is used for subaddressing + // use this option to enforce one email per user + rejectSubaddressing?: boolean +} + +export const parseEmail = ( + email: string, + options: ParseEmailOptions = {} +): { local: string; domain: string; effectiveAddr: string } | { error: string } => { + email = email.trim() + + if (email.endsWith('>')) { + if (!options.allowAngle) { + return { error: 'Angle address is not allowed' } + } + + const match = email.match(new RegExp('^[^<]*<([^>]+)>$')) + if (!match) { + return { error: 'Invalid angle address' } + } + + email = match[1] + } + + if (email.indexOf('@') === -1) { + return { error: 'Email does not contain "@".' } } - const split = email.split('@') - if (split.length < 2) { - return 'Email does not contain "@".' - } else { - const [domain] = split.slice(-1) - if (domain.indexOf('.') === -1) { - return 'Must contain a "." after the "@".' + + if (email.startsWith('"')) { + if (!options.allowQuoted) { + return { error: 'Quoted email addresses are not allowed' } + } + const match = email.match(emailRegexAddrSpecRFC5322Quoted) + if (!match) { + return { error: 'Invalid quoted email address' } } + const [, local, domain] = match + + if (options.rejectSubaddressing && local.includes('+')) { + return { error: 'Subaddressing is not allowed' } + } + + return { local, domain, effectiveAddr: `"${local}"@${domain}` } + } + + const match = email.match(emailRegexAddrSpecWhatWG) + if (!match) { + return { error: 'Invalid email address' } + } + + const [, local, domain] = match + + if (options.rejectSubaddressing && local.includes('+')) { + return { error: 'Subaddressing is not allowed' } + } + + return { local, domain, effectiveAddr: `${local}@${domain}` } +} + +// Left for backwards compatibility +export const isEmail = (email: string): string | undefined => { + const response = parseEmail(email, { allowQuoted: true, allowAngle: true }) + if ('error' in response) { + return response.error } + + return undefined } diff --git a/test/__snapshots__/index.test.ts.snap b/test/__snapshots__/index.test.ts.snap index 6926acb..9699e83 100644 --- a/test/__snapshots__/index.test.ts.snap +++ b/test/__snapshots__/index.test.ts.snap @@ -14,6 +14,9 @@ Object { "regex": Object { "valid": true, }, + "sanitization": Object { + "valid": true, + }, "smtp": Object { "reason": "SMTP Error: The mail address that you specified was not syntactically correct.", "valid": false, @@ -25,6 +28,62 @@ Object { } `; +exports[`validation tests fails when angle address is not allowed 1`] = ` +Object { + "reason": "regex", + "valid": false, + "validators": Object { + "disposable": Object { + "valid": false, + }, + "mx": Object { + "valid": false, + }, + "regex": Object { + "reason": "Angle address is not allowed", + "valid": false, + }, + "sanitization": Object { + "valid": true, + }, + "smtp": Object { + "valid": false, + }, + "typo": Object { + "valid": false, + }, + }, +} +`; + +exports[`validation tests fails when subaddressing is not allowed 1`] = ` +Object { + "reason": "regex", + "valid": false, + "validators": Object { + "disposable": Object { + "valid": false, + }, + "mx": Object { + "valid": false, + }, + "regex": Object { + "reason": "Subaddressing is not allowed", + "valid": false, + }, + "sanitization": Object { + "valid": true, + }, + "smtp": Object { + "valid": false, + }, + "typo": Object { + "valid": false, + }, + }, +} +`; + exports[`validation tests fails with bad dns 1`] = ` Object { "reason": "mx", @@ -40,6 +99,9 @@ Object { "regex": Object { "valid": true, }, + "sanitization": Object { + "valid": true, + }, "smtp": Object { "valid": false, }, @@ -90,6 +152,9 @@ Object { "reason": "Email does not contain \\"@\\".", "valid": false, }, + "sanitization": Object { + "valid": true, + }, "smtp": Object { "valid": false, }, @@ -114,6 +179,9 @@ Object { "regex": Object { "valid": true, }, + "sanitization": Object { + "valid": true, + }, "smtp": Object { "valid": false, }, @@ -140,6 +208,9 @@ Object { "regex": Object { "valid": true, }, + "sanitization": Object { + "valid": true, + }, "smtp": Object { "valid": false, }, @@ -164,8 +235,11 @@ Object { "regex": Object { "valid": true, }, + "sanitization": Object { + "valid": true, + }, "smtp": Object { - "reason": "Mail server closed connection without sending any data.", + "reason": "SMTP Error: The requested action was not done. Some error occurmiles in the mail server.", "valid": false, }, "typo": Object { @@ -175,6 +249,84 @@ Object { } `; +exports[`validation tests passes when angle address is allowed 1`] = ` +Object { + "valid": true, + "validators": Object { + "disposable": Object { + "valid": true, + }, + "mx": Object { + "valid": true, + }, + "regex": Object { + "valid": true, + }, + "sanitization": Object { + "valid": true, + }, + "smtp": Object { + "valid": true, + }, + "typo": Object { + "valid": true, + }, + }, +} +`; + +exports[`validation tests passes when quoted address 1`] = ` +Object { + "valid": true, + "validators": Object { + "disposable": Object { + "valid": true, + }, + "mx": Object { + "valid": true, + }, + "regex": Object { + "valid": true, + }, + "sanitization": Object { + "valid": true, + }, + "smtp": Object { + "valid": true, + }, + "typo": Object { + "valid": true, + }, + }, +} +`; + +exports[`validation tests passes when subaddressing is allowed 1`] = ` +Object { + "valid": true, + "validators": Object { + "disposable": Object { + "valid": true, + }, + "mx": Object { + "valid": true, + }, + "regex": Object { + "valid": true, + }, + "sanitization": Object { + "valid": true, + }, + "smtp": Object { + "valid": true, + }, + "typo": Object { + "valid": true, + }, + }, +} +`; + exports[`validation tests passes when valid special char 1`] = ` Object { "valid": true, @@ -188,6 +340,9 @@ Object { "regex": Object { "valid": true, }, + "sanitization": Object { + "valid": true, + }, "smtp": Object { "valid": true, }, @@ -234,6 +389,9 @@ Object { "regex": Object { "valid": true, }, + "sanitization": Object { + "valid": true, + }, "smtp": Object { "valid": true, }, @@ -257,6 +415,9 @@ Object { "regex": Object { "valid": true, }, + "sanitization": Object { + "valid": true, + }, "smtp": Object { "valid": true, }, diff --git a/test/index.test.ts b/test/index.test.ts index 9619a9e..40e6fb2 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -98,6 +98,68 @@ describe('validation tests', () => { elevenSeconds ) + it( + 'passes when quoted address', + async () => { + const res = await validate({ email: '"this+is+my+personal+email+address@me.invalid"@gmail.com', validateRegex: true, validateMx: true, validateSMTP: false, allowQuoted: true }) + expect(res.valid).toBe(true) + expect(every(values(res.validators), x => x && x.valid)).toBe(true) + expect(res).toMatchSnapshot() + } + ) + + it( + 'fails when quoted address is not allowed', + async () => { + const res = await validate({ email: '"this+is+my+personal+email+address@me.invalid"@gmail.com', validateRegex: true, validateMx: true, validateSMTP: false }) + expect(res.valid).toBe(false) + expect(res.validators.regex?.valid).toBe(false) + } + ) + + it( + 'passes when subaddressing is allowed', + async () => { + const res = await validate({ email: 'me+test@gmail.com', validateRegex: true, validateMx: true, validateSMTP: false }) + expect(res.valid).toBe(true) + expect(every(values(res.validators), x => x && x.valid)).toBe(true) + expect(res).toMatchSnapshot() + } + ) + + + it( + 'fails when subaddressing is not allowed', + async () => { + const res = await validate({ + email: 'me+test@gmail.com', validateRegex: true, validateMx: true, validateSMTP: false, rejectSubaddressing: true + }) + expect(res.valid).toBe(false) + expect(res.validators.regex?.valid).toBe(false) + expect(res).toMatchSnapshot() + } + ) + + it( + 'passes when angle address is allowed', + async () => { + const res = await validate({ email: 'Me ', validateRegex: true, validateMx: true, validateSMTP: false, allowAngle: true }) + expect(res.valid).toBe(true) + expect(every(values(res.validators), x => x && x.valid)).toBe(true) + expect(res).toMatchSnapshot() + } + ) + + it( + 'fails when angle address is not allowed', + async () => { + const res = await validate({ email: 'Me ', validateRegex: true, validateMx: true, validateSMTP: false }) + expect(res.valid).toBe(false) + expect(res.validators.regex?.valid).toBe(false) + expect(res).toMatchSnapshot() + } + ) + it( 'passes when valid wildcard', async () => {