I need to ‘sanitise’ large quantities of text by sometimes removing email addresses entirely and sometimes replacing the “@” with something that makes the address more difficult for online harvesters to identify. While working on these projects the comments of BBS members have been very helpful … thanks.
One comment … about how “heinously difficult” it was to check the validity of an email address (some awfully complex REGEXPs were being discussed at the time) … piqued my curiosity. If it works as intended, the attached script should check the input text for compliance with the email-address-format ‘standards’ (RFCs 2821 and 2822).
I have no doubt that the structure of the script, and probably the way it does lots of things, is awkward and unorthodox (comes from having no idea what is actually orthodox) … but it seems to work. I’d appreciate advice on ways to tidy this up, and any incorrect tests it may produce … for example I’ve built a repeat loop, right at the start, to allow the “Try another” button on the results dialog to restart the whole script again. Surely there’s a tidier way of doing this … I tried a number of things but this is the only way I achieved the desired result.
As for utility … this beast is no more efficient (false-positives or false-negatives), for my purpose, than a very simple algorithm checking for 6-or-more characters, at least one “at” (@) and at-least one dot (.). Oh well, I think I’ve learnt a lot about AppleScript, and email address formatting, in the process :rolleyes:
As for the REGEXP … I am now not sure that it’s actually possible to build a single REGEXP that can do all of this, but I haven’t dissected the ex-parrot monster to see what it actually does (http://www.ex-parrot.com/~pdw/Mail-RFC822-Address.html).
Cheers
Dougal
-- Email address RFC compliance test
-- A script to try and check the compliance of an email address,
-- against RFCs 2821 & 2822, using vanilla AppleScript only ... no shell,
-- no PERL, and no regexps.
-- Script does not check whether the test address is real, or whether the
-- domain (or the top-level domain) of the test address is real ... it only
-- tests for compliance with the RFC provisions
set ContinueOption to "Try another"
repeat while ContinueOption = "Try another"
set ContinueOption to "unknown"
define_variables_and_lists()
address_to_test(Test_Addresses)
-- Is the Addr-spec too long to comply?
set AddrSpecLength to length of PossibleAddress
if AddrSpecLength > 320 then
set ComplianceStatus to "failed"
set FailReason to "the test address is " & AddrSpecLength & " characters in length. A compliant email address cannot exceed 320 characters: 64 for the local part; 255 for the domain; and one for the @. (Ref: RFC2821, 4.5.3.1)"
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
else
-- split the address at the @ and see what comes out
set text item delimiters to "@"
set AddressMainParts to text items of PossibleAddress
set text item delimiters to space
-- first option: there's no @-split
if (count of AddressMainParts) < 2 then
set ComplianceStatus to "failed"
set FailReason to "the test address does not contain an \"at\" (@) character. All compliant email addresses have the form local-part@domain ... and so always contain at least one @ character. (Ref: RFC2822, 3.4.1; RFC2821, 4.1.2.)"
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
-- second option: the @-split results in two parts
else if (count of AddressMainParts) = 2 then
set LocalPart to text item 1 of AddressMainParts as text
set Domain to text item 2 of AddressMainParts as text
-- What format is the local-part?
if LocalPart does not contain "\"" then
set LocalPartFormat to "dot-atom"
else
if LocalPart starts with "\"" and LocalPart ends with "\"" then
set LocalPartFormat to "quoted-string"
else
set ComplianceStatus to "failed"
set FailReason to "the local-part of the address contains a double-quote character (\") but is not as part of a quoted-string. (Ref: RFC2822, 3.4.1 & 3.2.5; RFC2821, 4.1.2.)"
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
end if
end if
-- What format is the domain?
if Domain does not start with "[" or Domain does not end with "]" then
set DomainFormat to "dot-atom" -- it may not actually be but that is tested later
else
set DomainFormat to "domain-literal"
end if
else -- third option: the @-split results in more than two parts
-- the address contains more than one @ characters ( c- d- and q- text)
-- need to determine the format of the local-part and the domain so that
-- the address can be correctly split into those parts
if PossibleAddress ends with "]" and PossibleAddress contains "@[" then
set DomainFormat to "domain-literal"
else
set DomainFormat to "dot-atom"
end if
if PossibleAddress starts with "\"" and PossibleAddress contains "\"@" then
set LocalPartFormat to "quoted-string"
else
set LocalPartFormat to "dot-atom"
end if
-- the address can't have more than one @ and be in dot-atom@dot-atom format
if LocalPartFormat = "dot-atom" and DomainFormat = "dot-atom" then
set ComplianceStatus to "failed"
set FailReason to "the test address contains more than one \"at\" (@). Additional @ characters can only exist in a compliant address if they're contained within a quoted-string format local-part or a domain-literal format domain. The local-part of the test addres is not in quoted-string format and the domain is not in domain-literal format. (Ref: RFC2822, 3.4.1 & 3.2.5; RFC2821, 4.1.2.)."
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
end if
-- extract the local-part now that its format is known
if LocalPartFormat = "dot-atom" then
set text item delimiters to "@"
set LocalPart to item 1 of (text items of PossibleAddress) as text
set text item delimiters to space
else if LocalPartFormat = "quoted-string" then
set text item delimiters to "\"@"
set LocalPart to item 1 of (text items of PossibleAddress) as text
set LocalPart to LocalPart & "\""
set text item delimiters to space
end if
-- extract the domain now that its format is known
if DomainFormat = "dot-atom" then
set text item delimiters to "@"
set Domain to last item of (text items of PossibleAddress) as text
set text item delimiters to space
else if DomainFormat = "domain-literal" then
set text item delimiters to "@["
set Domain to item 2 of (text items of PossibleAddress) as text
set Domain to "[" & Domain
set text item delimiters to space
end if
end if
end if
-- by now the test address has either been rejected or the local-part and domain
-- have been identified, extracted, and tentatively categorised
-- (but domain has not yet been checked as IPv4 address-literal)
-- if local-part appears to be in the dot-atom format then check that it's compliant ------------------------------------
if ComplianceStatus is not "failed" and (LocalPartFormat = "dot-atom") then
set TestTextSet to RFC2822_atext & {"."}
set TestPart to "local-part"
dot_atomiser(PossibleAddress, LocalPart, TestTextSet, TestPart)
if DotAtomComplianceStatus = "passed" then
global LocalPartComplianceStatus
set LocalPartComplianceStatus to "passed"
end if
end if
-- if local-part appears to be in the quoted-string format then check that it's compliant ------------------------------
if ComplianceStatus is not "failed" and (LocalPartFormat = "quoted-string") then
check_quoted_string(LocalPart, RFC2822_text, RFC2822_qtext, PossibleAddress)
end if
-- if appropriate check that the dot-atom format of the domain is compliant ------------------------
if ComplianceStatus is not "failed" and (DomainFormat = "dot-atom") then
set TestTextSet to RFC2822_atext & {"."}
set DotAtomComplianceStatus to "unknown"
set TestPart to "domain"
dot_atomiser(PossibleAddress, Domain, TestTextSet, TestPart)
if DotAtomComplianceStatus = "passed" then
-- need to also ensure that domain's final dot has two or more characters after it
set text item delimiters to "."
set TopLevelDomain to last item of (text items of Domain) as text
set text item delimiters to space
if length of TopLevelDomain < 2 then
set FailReason to "the domain of the test address appears to be in the dot-atom format but there is only one character after the last dot (.). All Top Level Domains are two or more characters in length. (Ref: RFC1591, 3.2.4 & 3.4.1)"
set DotAtomComplianceStatus to "failed"
set DomainComplianceStatus to "failed"
set ComplianceStatus to "failed"
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
else
set DomainComplianceStatus to "passed"
-- could check the validity of the TLD here ... if you really wanted to
-- it may be marginally easier to use a whois check rather that try
-- and integrate the provisions of ISO3166 etc
end if
end if
end if
-- if the domain is in the domain-literal format check that it's compliant ... then,
-- if it's domain-literal compliant, check to see whether it's in address-literal format
-- ... then, if it's address-literal, check that that is also compliant
if ComplianceStatus is not "failed" and (DomainFormat = "domain-literal") then
check_domain_literal(Domain, RFC2822_text, RFC2822_dtext, PossibleAddress)
-- check for address-literal
if DomainComplianceStatus is "passed" then
check_address_literal(DomainCharacters, ASCII_DIGIT, PossibleAddress, OldDomain)
end if
end if
-- mop-up and just in case ------------------------------------------------------------------
if (ComplianceStatus is not equal to "failed") then
if LocalPartComplianceStatus = "passed" and DomainComplianceStatus = "passed" then
set ComplianceStatus to "passed"
set FailReason to ""
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
else
set ComplianceStatus to "neither passed nor failed"
set FailReason to "the test address hasn't yet been subjected to a complete battery of evaluation tests ... or, alternatively, I've missed a possibility in writing this script."
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
end if
end if
end repeat
-----------------------------------------------------------------------------------------------------
--- Subroutines -----------------------------------------------------------------------------------
on address_to_test(Test_Addresses)
global PossibleAddress
set PossibleAddress to some item of Test_Addresses
set PossibleAddress to (some item of Test_Addresses) as text
set PossibleAddress to the text returned of (display dialog "Enter the address you want tested," & return & "or use the test address supplied: " default answer PossibleAddress)
end address_to_test
-----------------------------------------------------------------------------------------------------
on concluding_response(PossibleAddress, ComplianceStatus, FailReason)
global ContinueOption
set ContinueOption to "unknown"
if ComplianceStatus = "failed" then
set ConcludingCaveat to return & "This was the first non-compliance identified. It is possible that " & PossibleAddress & " has additional non-compliances that were not checked during this evaluation."
set DialogToDisplay to "The test address (" & PossibleAddress & ") has " & ComplianceStatus & " this evaluation." & return & return & "This was because " & FailReason & ConcludingCaveat
set ContinueOption to the button returned of (display dialog DialogToDisplay buttons {"Cancel", "Try another"} default button "Try another" with icon stop)
else if ComplianceStatus = "passed" then
set ConcludingCaveat to return & "Although it has passed this compliance evaluation this does not mean that " & PossibleAddress & " is an actual email address. Other methods should be utilised to determine that."
set DialogToDisplay to "The test address (" & PossibleAddress & ") has " & ComplianceStatus & " this evaluation." & return & return & ConcludingCaveat
set ContinueOption to the button returned of (display dialog DialogToDisplay buttons {"Cancel", "Try another"} default button "Try another" with icon note)
else
set ComplianceStatus to "neither passed nor failed"
set FailReason to "the test address hasn't yet been subjected to a complete battery of evaluation tests."
set ConcludingCaveat to ""
set DialogToDisplay to "The test address (" & PossibleAddress & ") has " & ComplianceStatus & " this evaluation." & return & return & "This was because " & FailReason & ConcludingCaveat
set ContinueOption to the button returned of (display dialog DialogToDisplay buttons {"Cancel", "Try another"} default button "Try another" with icon caution)
end if
end concluding_response
-----------------------------------------------------------------------------------------------------
on dot_atomiser(PossibleAddress, TestString, TestTextSet, TestPart)
global DotAtomComplianceStatus
global ComplianceStatus
global FailReason
set DotAtomComplianceStatus to "unknown"
-- check for two or more dots in a row
if TestString contains ".." then
set DotAtomComplianceStatus to "failed"
set ComplianceStatus to "failed"
set FailReason to "the " & TestPart & " of the test address appears to be in the dot-atom format but contains two or more dots (.) in a row. The RFC2822 dot-atom format requires only 'atext' characters between each dot. (Ref: RFC2822, 3.2.4 & 3.4.1)"
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
else if first character of TestString is "." or last character of TestString is "." then
set DotAtomComplianceStatus to "failed"
set ComplianceStatus to "failed"
set FailReason to "the " & TestPart & " of the test address appears to be in the dot-atom format but contains a dot (.) at its start and / or finish. The RFC2822 dot-atom format requires 'atext' characters at the start and end of a dot-atom sequence. (Ref: RFC2822, 3.2.4 & 3.4.1)"
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
else
-- check that it's only atext
set TestCharacters to characters of TestString
repeat with n from 1 to count of TestCharacters
set TestCharacter to item n of TestCharacters
if TestTextSet does not contain TestCharacter then
set DotAtomComplianceStatus to "failed"
set ComplianceStatus to "failed"
set FailReason to "the " & TestPart & " of the test address appears to be in the dot-atom format but contains characters that do not belong in the RFC2822 'atext' subset of ASCII (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & n & " (" & TestCharacter & ") of the " & TestPart & " (" & TestString & ")."
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
exit repeat
end if
end repeat
if DotAtomComplianceStatus is not "failed" then
set DotAtomComplianceStatus to "passed"
end if
end if
end dot_atomiser
------------------------------------------------------------------------------------------------------
on check_quoted_string(LocalPart, RFC2822_text, RFC2822_qtext, PossibleAddress)
global OldLocalPart
global QuotedStringComplianceStatus
global LocalPartComplianceStatus
global ComplianceStatus
global TestCharacter
global x
global FailReason
set QuotedStringComplianceStatus to "unknown"
set LocalPartComplianceStatus to "unknown"
-- remove the DQUOTEs at each end of the quoted-string
set OldLocalPart to LocalPart
set text item delimiters to ""
set LocalPart to characters 2 through ((length of LocalPart) - 1) of LocalPart as text
set text item delimiters to space
set LocalPartCharacters to characters of LocalPart
-- test each character for being either qtext of quoted-pair
repeat with n from 1 to count of LocalPartCharacters
set TestCharacter to item n of LocalPartCharacters
if n is greater than 1 then
set z to (n - 1)
set PreviousCharacter to item z of LocalPartCharacters
else
set PreviousCharacter to "NULL"
end if
-- check if the TestCharacter belongs to the qtext subset
if RFC2822_qtext contains TestCharacter then
else if n = 1 and TestCharacter = "\\" then
else if n is greater than 1 and TestCharacter = "\\" then
else if n is greater than 1 and PreviousCharacter = "\\" then
if RFC2822_text does not contain TestCharacter then
set x to (n + 1)
set QuotedStringComplianceStatus to "failed"
set LocalPartComplianceStatus to "failed"
set ComplianceStatus to "failed"
set FailReason to "the local-part of the test address appears to be in the quoted-string format but contains characters that do not belong in the RFC2822 'text' subset of ASCII as part of a quoted-pair (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & x & " (" & TestCharacter & ") of the local-part (" & OldLocalPart & ")."
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
exit repeat
end if
else
if RFC2822_qtext does not contain TestCharacter then
set x to (n + 1)
set QuotedStringlComplianceStatus to "failed"
set LocalPartComplianceStatus to "failed"
set ComplianceStatus to "failed"
set FailReason to "the local-part of the test address appears to be in the quoted-string format but contains characters that do not belong in the RFC2822 'qtext' subset of ASCII and are not part of a quoted-pair (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & x & " (" & TestCharacter & ") of the local-part (" & OldLocalPart & ")."
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
exit repeat
end if
end if
end repeat
if QuotedStringComplianceStatus is not "failed" and LocalPartComplianceStatus is not "failed" then
set LocalPartComplianceStatus to "passed"
end if
end check_quoted_string
-----------------------------------------------------------------------------------------------------
on check_domain_literal(Domain, RFC2822_text, RFC2822_dtext, PossibleAddress)
global OldDomain
global DomainLiteralComplianceStatus
global DomainComplianceStatus
global ComplianceStatus
global TestCharacter
global x
global FailReason
global DomainCharacters
set DomainLiteralComplianceStatus to "unknown"
set DomainComplianceStatus to "unknown"
-- remove the square brackets at each end
set OldDomain to Domain
set text item delimiters to ""
set Domain to characters 2 through ((length of Domain) - 1) of Domain as text
set text item delimiters to space
set DomainCharacters to characters of Domain
-- test each character for being either dtext of quoted-pair
repeat with n from 1 to count of DomainCharacters
set TestCharacter to item n of DomainCharacters
if n is greater than 1 then
set z to (n - 1)
set PreviousCharacter to item z of DomainCharacters
else
set PreviousCharacter to "NULL"
end if
-- check if the TestCharacter belongs to the dtext subset
if RFC2822_dtext contains TestCharacter then
else if n = 1 and TestCharacter = "\\" then
else if n is greater than 1 and TestCharacter = "\\" then
else if n is greater than 1 and PreviousCharacter = "\\" then
if RFC2822_text does not contain TestCharacter then
set x to (n + 1)
set DomainLiteralComplianceStatus to "failed"
set DomainComplianceStatus to "failed"
set ComplianceStatus to "failed"
set FailReason to "the domain of the test address appears to be in the domain-literal format but contains characters that do not belong in the RFC2822 'text' subset of ASCII as part of a quoted-pair (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & x & " (" & TestCharacter & ") of the domain (" & OldDomain & ")."
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
exit repeat
end if
else
if RFC2822_dtext does not contain TestCharacter then
set x to (n + 1)
set DomainLiteralComplianceStatus to "failed"
set DomainComplianceStatus to "failed"
set ComplianceStatus to "failed"
set FailReason to "the domain of the test address appears to be in the domain-literal format but contains characters that do not belong in the RFC2822 'dtext' subset of ASCII and are not part of a quoted-pair (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & x & " (" & TestCharacter & ") of the domain (" & OldDomain & ")."
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
exit repeat
end if
end if
end repeat
if DomainLiteralComplianceStatus is not "failed" and DomainComplianceStatus is not "failed" then
set DomainComplianceStatus to "passed"
end if
end check_domain_literal
-----------------------------------------------------------------------------------------------------
on check_address_literal(DomainCharacters, ASCII_DIGIT, PossibleAddress, OldDomain)
global DomainFormat
global ComplianceStatus
repeat with n from 1 to count of DomainCharacters
set TestCharacter to item n of DomainCharacters
set TestTextSet to ASCII_DIGIT & {"."}
if ComplianceStatus is "failed" then exit repeat
if TestTextSet does not contain TestCharacter then
set DomainFormat to "domain-literal"
exit repeat
else
set DomainFormat to "address-literal"
set AddressLiteralComplianceStatus to "unknown"
-- reform the address-literal and extract the octets
set text item delimiters to ""
set Domain to DomainCharacters as text
set text item delimiters to "."
set DomainOctets to text items of Domain
set text item delimiters to space
-- check that there are four octets and that they're all less than 256
if (count of DomainOctets) is not 4 then
set AddressLiteralComplianceStatus to "failed"
set DomainComplianceStatus to "failed"
set ComplianceStatus to "failed"
set FailReason to "the domain of the test address (" & OldDomain & ") appears to be in the address-literal format but does not contain only four 'octets' of integers separated by dots (.) (Ref: RFC2821, 4.1.3)."
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
exit repeat
else
repeat with n from 1 to 4
if ComplianceStatus is "failed" then exit repeat
set TestItem to item n of DomainOctets as integer
if TestItem > 255 then
set AddressLiteralComplianceStatus to "failed"
set DomainComplianceStatus to "failed"
set ComplianceStatus to "failed"
set FailReason to "the domain of the test address (" & OldDomain & ") appears to be in the address-literal format but contains 'octets' of integers that are greater than 255 (Ref: RFC2821, 4.1.3)." & "The offending item is 'octet' number " & n & " of the domain: (" & TestItem & ")."
concluding_response(PossibleAddress, ComplianceStatus, FailReason)
exit repeat
end if
end repeat
end if
if AddressLiteralComplianceStatus is not "failed" then
set AddressLiteralComplianceStatus to "passed"
set DomainComplianceStatus to "passed"
end if
end if
end repeat
end check_address_literal
-----------------------------------------------------------------------------------------------------
on define_variables_and_lists()
global ComplianceStatus
set ComplianceStatus to "unknown"
global FailReason
set FailReason to "unknown"
global Test_Addresses
set Test_Addresses to {"Foo.Bar@[1.2.3.4]", "8pbn20$oqv$1@example.com", "Foo.Bar@[111.222.333.444]", "Foo.Bar@[12345.67890]", "Foo.Bar@example.com", "\"Foo\"Bar\"@example.com", "\"Foo\\\"Bar\"@example.com", "Foo.Bar@exa[mple.com", "Foo.Bar@[exa[mple.com]", "Foo.Bar@[ex\\[mple.com]", "Foo.Bar@example.c", "#!/bin/sh++@[1.2.3.4]", "Foo..Bar@example.com", "Foo.Bar@.example.com", "\"Foo.Bar\"@example.com", "\"Foo@Bar@[example.com", "Foo.Bar@[ex@mple.com]", "\"Foo.B@r\"@[ex@mple.com]", "\"Foo.B@r\"@example.com", "Foo.Bar@example.com", "\"F@t.Foo.Bar\"@[ex@mple.com]", "Foo.Bar[at]example.com", "Foo.Bar@example-isp-name.com", "Foo.Bar@example_isp_name.com", "Foo.Bar@example.com", "foo+bar@example.com", "\\<@example.com", "\"foo\\,bar\"@example.com", "\"foo+bar@example.com"}
local ASCII_ALPHA
set ASCII_ALPHA to {"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S ", "T", "U", "V", "W", "X", "Y", "Z"} & {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}
-- ALPHA = %d65-90 / ;
-- %d97-122 / ;
global ASCII_DIGIT
set ASCII_DIGIT to {"1", "2", "3", "4", "5", "6", "7", "8", "9", "0"}
-- DIGIT = %d48-57 / ;
local ASCII_controls_without_whitespace
set ASCII_controls_without_whitespace to {ASCII character 1, ASCII character 2, ASCII character 3, ASCII character 4, ASCII character 5, ASCII character 6, ASCII character 7, ASCII character 8, ASCII character 11, ASCII character 12, ASCII character 14, ASCII character 15, ASCII character 16, ASCII character 17, ASCII character 18, ASCII character 19, ASCII character 20, ASCII character 21, ASCII character 22, ASCII character 23, ASCII character 24, ASCII character 25, ASCII character 26, ASCII character 27, ASCII character 28, ASCII character 29, ASCII character 30, ASCII character 31, ASCII character 127}
local ASCII_controls_without_CR_LF
set ASCII_controls_without_CR_LF to ASCII_controls_without_whitespace & {ASCII character 9}
local ASCII_control_characters
set ASCII_control_characters to ASCII_controls_without_whitespace & {ASCII character 9, ASCII character 10, ASCII character 13}
-- control characters = %d1-31 / ;
local ASCII_rest_of_noncontrol_characters_except -- specials and space
set ASCII_rest_of_noncontrol_characters_except to {"!", "#", "$", "%", "&", "'", "*", "+", "-", "/", "=", "?", "^", "_", "`", "{", "|", "}", "~", ASCII character 127}
local RFC2822_Specials
set RFC2822_Specials to {"(", ")", "<", ">", "[", "]", ":", ";", "@", "\\", ",", ".", "\""}
local ASCII_rest_of_noncontrol_characters
set ASCII_rest_of_noncontrol_characters to ASCII_rest_of_noncontrol_characters_except & RFC2822_Specials & {space}
-- rest of noncontrol characters = %d32-47 / ; Non ALPHA, non DIGIT,
-- %d58-64 / ; and non Control
-- %d91-96 / ; Character
-- %d123-127 / ;
local RFC2822_Specials
set RFC2822_Specials to {"(", ")", "<", ">", "[", "]", ":", ";", "@", "\\", ",", ".", "\""}
local RFC2822_NO_WS_CTL
set RFC2822_NO_WS_CTL to ASCII_controls_without_whitespace
-- NO-WS-CTL = %d1-8 / ; US-ASCII control characters
-- %d11 / ; that do not include the
-- %d12 / ; carriage return (13), line feed (10),
-- %d14-31 / ; and white space characters (9)
-- %d127
global RFC2822_text
set RFC2822_text to ASCII_ALPHA & ASCII_DIGIT & ASCII_rest_of_noncontrol_characters & ASCII_controls_without_CR_LF
-- text = %d1-9 / ; Characters excluding CR and LF
-- %d11 /
-- %d12 /
-- %d14-127 /
global RFC2822_atext
set RFC2822_atext to ASCII_ALPHA & ASCII_DIGIT & ASCII_rest_of_noncontrol_characters_except
-- atext = ALPHA / DIGIT / ; Any character except controls,
-- "!" / "#" / ; SP, and specials.
-- "$" / "%" / ; Used for atoms
-- "&" / "'" /
-- "*" / "+" /
-- "-" / "/" /
-- "=" / "?" /
-- "^" / "_" /
-- "`" / "{" /
-- "|" / "}" /
-- "~"
global RFC2822_ctext
set RFC2822_ctext to RFC2822_NO_WS_CTL & RFC2822_atext & {"<", ">", "[", "]", ":", ";", "@", ",", ".", "\""}
-- ctext = NO-WS-CTL / ; Non white space controls
-- %d33-39 / ; The rest of the US-ASCII
-- %d42-91 / ; characters not including "(",
-- %d93-126 ; ")", or "\"
global RFC2822_dtext
set RFC2822_dtext to RFC2822_NO_WS_CTL & RFC2822_atext & {"(", ")", "<", ">", ":", ";", "@", ",", ".", "\""}
-- dtext = NO-WS-CTL / ; Non white space controls
-- %d33-90 / ; The rest of the US-ASCII
-- %d94-126 ; characters not including "[",
-- ; "]", or "\"
global RFC2822_qtext
set RFC2822_qtext to RFC2822_NO_WS_CTL & RFC2822_atext & {"(", ")", "<", ">", "[", "]", ":", ";", "@", ",", "."}
-- qtext = NO-WS-CTL / ; Non white space controls
-- %d33 / ; The rest of the US-ASCII
-- %d35-91 / ; characters not including "\"
-- %d93-126 ; or the quote character
end define_variables_and_lists