support unicode in EmailField

This commit is contained in:
Stefan Wojcik 2017-04-09 22:33:11 -04:00
parent b52d3e3a7b
commit 601b79865d
2 changed files with 135 additions and 24 deletions

View File

@ -4,6 +4,7 @@ import itertools
import re import re
import time import time
import uuid import uuid
import socket
import warnings import warnings
from collections import Mapping from collections import Mapping
from operator import itemgetter from operator import itemgetter
@ -154,21 +155,103 @@ class EmailField(StringField):
.. versionadded:: 0.4 .. versionadded:: 0.4
""" """
USER_REGEX = re.compile(
EMAIL_REGEX = re.compile( # `dot-atom` defined in RFC 5322 Section 3.2.3.
# dot-atom r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z"
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*" # `quoted-string` defined in RFC 5322 Section 3.2.4.
# quoted-string r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)',
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-011\013\014\016-\177])*"' re.IGNORECASE
# domain (max length of an ICAAN TLD is 22 characters)
r')@(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}|[A-Z0-9-]{2,}(?<!-))$', re.IGNORECASE
) )
UTF8_USER_REGEX = re.compile(
# RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to include
# `UTF8-non-ascii`.
ur"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+)*\Z"
# `quoted-string`
ur'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)',
re.IGNORECASE | re.UNICODE
)
DOMAIN_REGEX = re.compile(
r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z',
re.IGNORECASE
)
domain_whitelist = ['localhost']
error_msg = u'Invalid email address: %s'
def __init__(self, domain_whitelist=None, allow_utf8_user=False, *args,
**kwargs):
"""Initialize the EmailField.
Args:
domain_whitelist (list) - list of otherwise invalid domain
names which you'd like to support.
Includes "localhost" by default.
allow_utf8_user (bool) - if True, the user part of the email
address can contain UTF8 characters.
False by default.
"""
if domain_whitelist is not None:
self.domain_whitelist = domain_whitelist
self.allow_utf8_user = allow_utf8_user
super(EmailField, self).__init__(*args, **kwargs)
def validate_user_part(self, user_part):
"""Validate the user part of the email address. Return True if
valid and False otherwise.
"""
if self.allow_utf8_user:
return self.UTF8_USER_REGEX.match(user_part)
return self.USER_REGEX.match(user_part)
def validate_domain_part(self, domain_part):
"""Validate the domain part of the email address. Return True if
valid and False otherwise.
"""
# Skip domain validation if it's in the whitelist.
if domain_part in self.domain_whitelist:
return True
if self.DOMAIN_REGEX.match(domain_part):
return True
# Validate IPv4/IPv6, e.g. user@[192.168.0.1]
if domain_part[0] == '[' and domain_part[-1] == ']':
for addr_family in socket.AF_INET, socket.AF_INET6:
try:
return socket.inet_pton(addr_family , domain_part[1:-1])
return True
except (socket.error, UnicodeEncodeError):
pass
return False
def validate(self, value): def validate(self, value):
if not EmailField.EMAIL_REGEX.match(value):
self.error('Invalid email address: %s' % value)
super(EmailField, self).validate(value) super(EmailField, self).validate(value)
if '@' not in value:
self.error(self.error_msg % value)
user_part, domain_part = value.rsplit('@', 1)
# Validate the user part.
if not self.validate_user_part(user_part):
self.error(self.error_msg % value)
# Validate the domain and, if invalid, see if it's IDN-encoded.
if not self.validate_domain_part(domain_part):
try:
domain_part = domain_part.encode('idna').decode('ascii')
except UnicodeError:
self.error(self.error_msg % value)
else:
if not self.validate_domain_part(domain_part):
self.error(self.error_msg % value)
class IntField(BaseField): class IntField(BaseField):
"""32-bit integer field.""" """32-bit integer field."""

View File

@ -342,8 +342,6 @@ class FieldTest(MongoDBTestCase):
class Link(Document): class Link(Document):
url = URLField() url = URLField()
Link.drop_collection()
link = Link() link = Link()
link.url = 'google' link.url = 'google'
self.assertRaises(ValidationError, link.validate) self.assertRaises(ValidationError, link.validate)
@ -356,8 +354,6 @@ class FieldTest(MongoDBTestCase):
class Link(Document): class Link(Document):
url = URLField() url = URLField()
Link.drop_collection()
link = Link() link = Link()
link.url = u'http://привет.com' link.url = u'http://привет.com'
@ -3456,25 +3452,57 @@ class FieldTest(MongoDBTestCase):
class User(Document): class User(Document):
email = EmailField() email = EmailField()
user = User(email="ross@example.com") user = User(email='ross@example.com')
self.assertTrue(user.validate() is None) user.validate()
user = User(email="ross@example.co.uk") user = User(email='ross@example.co.uk')
self.assertTrue(user.validate() is None) user.validate()
user = User(email=("Kofq@rhom0e4klgauOhpbpNdogawnyIKvQS0wk2mjqrgGQ5S" user = User(email=('Kofq@rhom0e4klgauOhpbpNdogawnyIKvQS0wk2mjqrgGQ5S'
"aJIazqqWkm7.net")) 'aJIazqqWkm7.net'))
self.assertTrue(user.validate() is None) user.validate()
user = User(email="new-tld@example.technology") user = User(email='new-tld@example.technology')
self.assertTrue(user.validate() is None) user.validate()
user = User(email='ross@example.com.')
self.assertRaises(ValidationError, user.validate)
# localhost should be whitelisted by default
user = User(email='me@localhost') user = User(email='me@localhost')
user.validate()
# valid IPv4 domain
user = User(email='email@[127.0.0.1]')
user.validate()
# valid IPv6 domain
user = User(email='email@[2001:dB8::1]')
user.validate()
# invalid IP
user = User(email='email@[324.0.0.1]')
self.assertRaises(ValidationError, user.validate) self.assertRaises(ValidationError, user.validate)
user = User(email="ross@example.com.") # unicode domain
user = User(email=u'user@пример.рф')
user.validate()
# invalid unicode domain
user = User(email=u'user@пример')
self.assertRaises(ValidationError, user.validate) self.assertRaises(ValidationError, user.validate)
# unicode user shouldn't validate by default...
user = User(email=u'Dörte@Sörensen.example.com')
self.assertRaises(ValidationError, user.validate)
# ...but it should be fine with allow_utf8_user set to True
class User(Document):
email = EmailField(allow_utf8_user=True)
user = User(email=u'Dörte@Sörensen.example.com')
user.validate()
def test_email_field_honors_regex(self): def test_email_field_honors_regex(self):
class User(Document): class User(Document):
email = EmailField(regex=r'\w+@example.com') email = EmailField(regex=r'\w+@example.com')