Compare commits
3 Commits
unicode-em
...
fix-hash
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2579e0b840 | ||
|
|
824ec42005 | ||
|
|
466935e9a3 |
@@ -6,6 +6,11 @@ Development
|
|||||||
===========
|
===========
|
||||||
- (Fill this out as you fix issues and develop your features).
|
- (Fill this out as you fix issues and develop your features).
|
||||||
|
|
||||||
|
Changes in 0.13.0
|
||||||
|
=================
|
||||||
|
- POTENTIAL BREAKING CHANGE: Added Unicode support to the `EmailField`, see
|
||||||
|
docs/upgrade.rst for details.
|
||||||
|
|
||||||
Changes in 0.12.0
|
Changes in 0.12.0
|
||||||
=================
|
=================
|
||||||
- POTENTIAL BREAKING CHANGE: Fixed limit/skip/hint/batch_size chaining #1476
|
- POTENTIAL BREAKING CHANGE: Fixed limit/skip/hint/batch_size chaining #1476
|
||||||
|
|||||||
@@ -6,6 +6,17 @@ Development
|
|||||||
***********
|
***********
|
||||||
(Fill this out whenever you introduce breaking changes to MongoEngine)
|
(Fill this out whenever you introduce breaking changes to MongoEngine)
|
||||||
|
|
||||||
|
0.13.0
|
||||||
|
******
|
||||||
|
This release adds Unicode support to the `EmailField` and changes its
|
||||||
|
structure significantly. Previously, email addresses containing Unicode
|
||||||
|
characters didn't work at all. Starting with v0.13.0, domains with Unicode
|
||||||
|
characters are supported out of the box, meaning some emails that previously
|
||||||
|
didn't pass validation now do. Make sure the rest of your application can
|
||||||
|
accept such email addresses. Additionally, if you subclassed the `EmailField`
|
||||||
|
in your application and overrode `EmailField.EMAIL_REGEX`, you will have to
|
||||||
|
adjust your code to override `EmailField.USER_REGEX`, `EmailField.DOMAIN_REGEX`,
|
||||||
|
and potentially `EmailField.UTF8_USER_REGEX`.
|
||||||
|
|
||||||
0.12.0
|
0.12.0
|
||||||
******
|
******
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ __all__ = (list(document.__all__) + list(fields.__all__) +
|
|||||||
list(signals.__all__) + list(errors.__all__))
|
list(signals.__all__) + list(errors.__all__))
|
||||||
|
|
||||||
|
|
||||||
VERSION = (0, 12, 0)
|
VERSION = (0, 13, 0)
|
||||||
|
|
||||||
|
|
||||||
def get_version():
|
def get_version():
|
||||||
|
|||||||
@@ -272,13 +272,6 @@ class BaseDocument(object):
|
|||||||
def __ne__(self, other):
|
def __ne__(self, other):
|
||||||
return not self.__eq__(other)
|
return not self.__eq__(other)
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
if getattr(self, 'pk', None) is None:
|
|
||||||
# For new object
|
|
||||||
return super(BaseDocument, self).__hash__()
|
|
||||||
else:
|
|
||||||
return hash(self.pk)
|
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
"""
|
"""
|
||||||
Hook for doing document level data cleaning before validation is run.
|
Hook for doing document level data cleaning before validation is run.
|
||||||
|
|||||||
@@ -60,6 +60,12 @@ class EmbeddedDocument(BaseDocument):
|
|||||||
my_metaclass = DocumentMetaclass
|
my_metaclass = DocumentMetaclass
|
||||||
__metaclass__ = DocumentMetaclass
|
__metaclass__ = DocumentMetaclass
|
||||||
|
|
||||||
|
# A generic embedded document doesn't have any immutable properties
|
||||||
|
# that describe it uniquely, hence it shouldn't be hashable. You can
|
||||||
|
# define your own __hash__ method on a subclass if you need your
|
||||||
|
# embedded documents to be hashable.
|
||||||
|
__hash__ = None
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(EmbeddedDocument, self).__init__(*args, **kwargs)
|
super(EmbeddedDocument, self).__init__(*args, **kwargs)
|
||||||
self._instance = None
|
self._instance = None
|
||||||
@@ -160,6 +166,15 @@ class Document(BaseDocument):
|
|||||||
"""Set the primary key."""
|
"""Set the primary key."""
|
||||||
return setattr(self, self._meta['id_field'], value)
|
return setattr(self, self._meta['id_field'], value)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
"""Return the hash based on the PK of this document. If it's new
|
||||||
|
and doesn't have a PK yet, return the default object hash instead.
|
||||||
|
"""
|
||||||
|
if self.pk is None:
|
||||||
|
return super(BaseDocument, self).__hash__()
|
||||||
|
else:
|
||||||
|
return hash(self.pk)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _get_db(cls):
|
def _get_db(cls):
|
||||||
"""Some Model using other db_alias"""
|
"""Some Model using other db_alias"""
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import datetime
|
|||||||
import decimal
|
import decimal
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
import socket
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
import warnings
|
import warnings
|
||||||
@@ -154,21 +155,105 @@ class EmailField(StringField):
|
|||||||
|
|
||||||
.. versionadded:: 0.4
|
.. versionadded:: 0.4
|
||||||
"""
|
"""
|
||||||
|
USER_REGEX = re.compile(
|
||||||
EMAIL_REGEX = re.compile(
|
# `dot-atom` defined in RFC 5322 Section 3.2.3.
|
||||||
# dot-atom
|
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z"
|
||||||
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*"
|
# `quoted-string` defined in RFC 5322 Section 3.2.4.
|
||||||
# quoted-string
|
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)',
|
||||||
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-011\013\014\016-\177])*"'
|
re.IGNORECASE
|
||||||
# domain (max length of an ICAAN TLD is 22 characters)
|
|
||||||
r')@(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}|[A-Z0-9-]{2,}(?<!-))$', re.IGNORECASE
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
UTF8_USER_REGEX = re.compile(
|
||||||
|
six.u(
|
||||||
|
# RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to
|
||||||
|
# include `UTF8-non-ascii`.
|
||||||
|
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+)*\Z"
|
||||||
|
# `quoted-string`
|
||||||
|
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)'
|
||||||
|
), re.IGNORECASE | re.UNICODE
|
||||||
|
)
|
||||||
|
|
||||||
|
DOMAIN_REGEX = re.compile(
|
||||||
|
r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z',
|
||||||
|
re.IGNORECASE
|
||||||
|
)
|
||||||
|
|
||||||
|
error_msg = u'Invalid email address: %s'
|
||||||
|
|
||||||
|
def __init__(self, domain_whitelist=None, allow_utf8_user=False,
|
||||||
|
allow_ip_domain=False, *args, **kwargs):
|
||||||
|
"""Initialize the EmailField.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
domain_whitelist (list) - list of otherwise invalid domain
|
||||||
|
names which you'd like to support.
|
||||||
|
allow_utf8_user (bool) - if True, the user part of the email
|
||||||
|
address can contain UTF8 characters.
|
||||||
|
False by default.
|
||||||
|
allow_ip_domain (bool) - if True, the domain part of the email
|
||||||
|
can be a valid IPv4 or IPv6 address.
|
||||||
|
"""
|
||||||
|
self.domain_whitelist = domain_whitelist or []
|
||||||
|
self.allow_utf8_user = allow_utf8_user
|
||||||
|
self.allow_ip_domain = allow_ip_domain
|
||||||
|
super(EmailField, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def validate_user_part(self, user_part):
|
||||||
|
"""Validate the user part of the email address. Return True if
|
||||||
|
valid and False otherwise.
|
||||||
|
"""
|
||||||
|
if self.allow_utf8_user:
|
||||||
|
return self.UTF8_USER_REGEX.match(user_part)
|
||||||
|
return self.USER_REGEX.match(user_part)
|
||||||
|
|
||||||
|
def validate_domain_part(self, domain_part):
|
||||||
|
"""Validate the domain part of the email address. Return True if
|
||||||
|
valid and False otherwise.
|
||||||
|
"""
|
||||||
|
# Skip domain validation if it's in the whitelist.
|
||||||
|
if domain_part in self.domain_whitelist:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if self.DOMAIN_REGEX.match(domain_part):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Validate IPv4/IPv6, e.g. user@[192.168.0.1]
|
||||||
|
if (
|
||||||
|
self.allow_ip_domain and
|
||||||
|
domain_part[0] == '[' and
|
||||||
|
domain_part[-1] == ']'
|
||||||
|
):
|
||||||
|
for addr_family in (socket.AF_INET, socket.AF_INET6):
|
||||||
|
try:
|
||||||
|
socket.inet_pton(addr_family, domain_part[1:-1])
|
||||||
|
return True
|
||||||
|
except (socket.error, UnicodeEncodeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
def validate(self, value):
|
def validate(self, value):
|
||||||
if not EmailField.EMAIL_REGEX.match(value):
|
|
||||||
self.error('Invalid email address: %s' % value)
|
|
||||||
super(EmailField, self).validate(value)
|
super(EmailField, self).validate(value)
|
||||||
|
|
||||||
|
if '@' not in value:
|
||||||
|
self.error(self.error_msg % value)
|
||||||
|
|
||||||
|
user_part, domain_part = value.rsplit('@', 1)
|
||||||
|
|
||||||
|
# Validate the user part.
|
||||||
|
if not self.validate_user_part(user_part):
|
||||||
|
self.error(self.error_msg % value)
|
||||||
|
|
||||||
|
# Validate the domain and, if invalid, see if it's IDN-encoded.
|
||||||
|
if not self.validate_domain_part(domain_part):
|
||||||
|
try:
|
||||||
|
domain_part = domain_part.encode('idna').decode('ascii')
|
||||||
|
except UnicodeError:
|
||||||
|
self.error(self.error_msg % value)
|
||||||
|
else:
|
||||||
|
if not self.validate_domain_part(domain_part):
|
||||||
|
self.error(self.error_msg % value)
|
||||||
|
|
||||||
|
|
||||||
class IntField(BaseField):
|
class IntField(BaseField):
|
||||||
"""32-bit integer field."""
|
"""32-bit integer field."""
|
||||||
|
|||||||
@@ -844,7 +844,7 @@ class InstanceTest(unittest.TestCase):
|
|||||||
class Recipient(Document):
|
class Recipient(Document):
|
||||||
email = EmailField(required=True)
|
email = EmailField(required=True)
|
||||||
|
|
||||||
recipient = Recipient(email='root@localhost')
|
recipient = Recipient(email='not-an-email')
|
||||||
self.assertRaises(ValidationError, recipient.save)
|
self.assertRaises(ValidationError, recipient.save)
|
||||||
recipient.save(validate=False)
|
recipient.save(validate=False)
|
||||||
|
|
||||||
@@ -2164,7 +2164,7 @@ class InstanceTest(unittest.TestCase):
|
|||||||
class BlogPost(Document):
|
class BlogPost(Document):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Clear old datas
|
# Clear old data
|
||||||
User.drop_collection()
|
User.drop_collection()
|
||||||
BlogPost.drop_collection()
|
BlogPost.drop_collection()
|
||||||
|
|
||||||
@@ -2176,17 +2176,18 @@ class InstanceTest(unittest.TestCase):
|
|||||||
b1 = BlogPost.objects.create()
|
b1 = BlogPost.objects.create()
|
||||||
b2 = BlogPost.objects.create()
|
b2 = BlogPost.objects.create()
|
||||||
|
|
||||||
# in List
|
# Make sure docs are properly identified in a list (__eq__ is used
|
||||||
|
# for the comparison).
|
||||||
all_user_list = list(User.objects.all())
|
all_user_list = list(User.objects.all())
|
||||||
|
|
||||||
self.assertTrue(u1 in all_user_list)
|
self.assertTrue(u1 in all_user_list)
|
||||||
self.assertTrue(u2 in all_user_list)
|
self.assertTrue(u2 in all_user_list)
|
||||||
self.assertTrue(u3 in all_user_list)
|
self.assertTrue(u3 in all_user_list)
|
||||||
self.assertFalse(u4 in all_user_list) # New object
|
self.assertTrue(u4 not in all_user_list) # New object
|
||||||
self.assertFalse(b1 in all_user_list) # Other object
|
self.assertTrue(b1 not in all_user_list) # Other object
|
||||||
self.assertFalse(b2 in all_user_list) # Other object
|
self.assertTrue(b2 not in all_user_list) # Other object
|
||||||
|
|
||||||
# in Dict
|
# Make sure docs can be used as keys in a dict (__hash__ is used
|
||||||
|
# for hashing the docs).
|
||||||
all_user_dic = {}
|
all_user_dic = {}
|
||||||
for u in User.objects.all():
|
for u in User.objects.all():
|
||||||
all_user_dic[u] = "OK"
|
all_user_dic[u] = "OK"
|
||||||
@@ -2198,9 +2199,20 @@ class InstanceTest(unittest.TestCase):
|
|||||||
self.assertEqual(all_user_dic.get(b1, False), False) # Other object
|
self.assertEqual(all_user_dic.get(b1, False), False) # Other object
|
||||||
self.assertEqual(all_user_dic.get(b2, False), False) # Other object
|
self.assertEqual(all_user_dic.get(b2, False), False) # Other object
|
||||||
|
|
||||||
# in Set
|
# Make sure docs are properly identified in a set (__hash__ is used
|
||||||
|
# for hashing the docs).
|
||||||
all_user_set = set(User.objects.all())
|
all_user_set = set(User.objects.all())
|
||||||
self.assertTrue(u1 in all_user_set)
|
self.assertTrue(u1 in all_user_set)
|
||||||
|
self.assertTrue(u4 not in all_user_set)
|
||||||
|
self.assertTrue(b1 not in all_user_list)
|
||||||
|
self.assertTrue(b2 not in all_user_list)
|
||||||
|
|
||||||
|
# Make sure duplicate docs aren't accepted in the set
|
||||||
|
self.assertEqual(len(all_user_set), 3)
|
||||||
|
all_user_set.add(u1)
|
||||||
|
all_user_set.add(u2)
|
||||||
|
all_user_set.add(u3)
|
||||||
|
self.assertEqual(len(all_user_set), 3)
|
||||||
|
|
||||||
def test_picklable(self):
|
def test_picklable(self):
|
||||||
pickle_doc = PickleTest(number=1, string="One", lists=['1', '2'])
|
pickle_doc = PickleTest(number=1, string="One", lists=['1', '2'])
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import math
|
|||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
import pymongo
|
import pymongo
|
||||||
|
import sys
|
||||||
|
|
||||||
from nose.plugins.skip import SkipTest
|
from nose.plugins.skip import SkipTest
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
@@ -342,8 +343,6 @@ class FieldTest(MongoDBTestCase):
|
|||||||
class Link(Document):
|
class Link(Document):
|
||||||
url = URLField()
|
url = URLField()
|
||||||
|
|
||||||
Link.drop_collection()
|
|
||||||
|
|
||||||
link = Link()
|
link = Link()
|
||||||
link.url = 'google'
|
link.url = 'google'
|
||||||
self.assertRaises(ValidationError, link.validate)
|
self.assertRaises(ValidationError, link.validate)
|
||||||
@@ -356,8 +355,6 @@ class FieldTest(MongoDBTestCase):
|
|||||||
class Link(Document):
|
class Link(Document):
|
||||||
url = URLField()
|
url = URLField()
|
||||||
|
|
||||||
Link.drop_collection()
|
|
||||||
|
|
||||||
link = Link()
|
link = Link()
|
||||||
link.url = u'http://привет.com'
|
link.url = u'http://привет.com'
|
||||||
|
|
||||||
@@ -3456,23 +3453,99 @@ class FieldTest(MongoDBTestCase):
|
|||||||
class User(Document):
|
class User(Document):
|
||||||
email = EmailField()
|
email = EmailField()
|
||||||
|
|
||||||
user = User(email="ross@example.com")
|
user = User(email='ross@example.com')
|
||||||
self.assertTrue(user.validate() is None)
|
user.validate()
|
||||||
|
|
||||||
user = User(email="ross@example.co.uk")
|
user = User(email='ross@example.co.uk')
|
||||||
self.assertTrue(user.validate() is None)
|
user.validate()
|
||||||
|
|
||||||
user = User(email=("Kofq@rhom0e4klgauOhpbpNdogawnyIKvQS0wk2mjqrgGQ5S"
|
user = User(email=('Kofq@rhom0e4klgauOhpbpNdogawnyIKvQS0wk2mjqrgGQ5S'
|
||||||
"aJIazqqWkm7.net"))
|
'aJIazqqWkm7.net'))
|
||||||
self.assertTrue(user.validate() is None)
|
user.validate()
|
||||||
|
|
||||||
user = User(email="new-tld@example.technology")
|
user = User(email='new-tld@example.technology')
|
||||||
self.assertTrue(user.validate() is None)
|
user.validate()
|
||||||
|
|
||||||
|
user = User(email='ross@example.com.')
|
||||||
|
self.assertRaises(ValidationError, user.validate)
|
||||||
|
|
||||||
|
# unicode domain
|
||||||
|
user = User(email=u'user@пример.рф')
|
||||||
|
user.validate()
|
||||||
|
|
||||||
|
# invalid unicode domain
|
||||||
|
user = User(email=u'user@пример')
|
||||||
|
self.assertRaises(ValidationError, user.validate)
|
||||||
|
|
||||||
|
# invalid data type
|
||||||
|
user = User(email=123)
|
||||||
|
self.assertRaises(ValidationError, user.validate)
|
||||||
|
|
||||||
|
def test_email_field_unicode_user(self):
|
||||||
|
# Don't run this test on pypy3, which doesn't support unicode regex:
|
||||||
|
# https://bitbucket.org/pypy/pypy/issues/1821/regular-expression-doesnt-find-unicode
|
||||||
|
if sys.version_info[:2] == (3, 2):
|
||||||
|
raise SkipTest('unicode email addresses are not supported on PyPy 3')
|
||||||
|
|
||||||
|
class User(Document):
|
||||||
|
email = EmailField()
|
||||||
|
|
||||||
|
# unicode user shouldn't validate by default...
|
||||||
|
user = User(email=u'Dörte@Sörensen.example.com')
|
||||||
|
self.assertRaises(ValidationError, user.validate)
|
||||||
|
|
||||||
|
# ...but it should be fine with allow_utf8_user set to True
|
||||||
|
class User(Document):
|
||||||
|
email = EmailField(allow_utf8_user=True)
|
||||||
|
|
||||||
|
user = User(email=u'Dörte@Sörensen.example.com')
|
||||||
|
user.validate()
|
||||||
|
|
||||||
|
def test_email_field_domain_whitelist(self):
|
||||||
|
class User(Document):
|
||||||
|
email = EmailField()
|
||||||
|
|
||||||
|
# localhost domain shouldn't validate by default...
|
||||||
user = User(email='me@localhost')
|
user = User(email='me@localhost')
|
||||||
self.assertRaises(ValidationError, user.validate)
|
self.assertRaises(ValidationError, user.validate)
|
||||||
|
|
||||||
user = User(email="ross@example.com.")
|
# ...but it should be fine if it's whitelisted
|
||||||
|
class User(Document):
|
||||||
|
email = EmailField(domain_whitelist=['localhost'])
|
||||||
|
|
||||||
|
user = User(email='me@localhost')
|
||||||
|
user.validate()
|
||||||
|
|
||||||
|
def test_email_field_ip_domain(self):
|
||||||
|
class User(Document):
|
||||||
|
email = EmailField()
|
||||||
|
|
||||||
|
valid_ipv4 = 'email@[127.0.0.1]'
|
||||||
|
valid_ipv6 = 'email@[2001:dB8::1]'
|
||||||
|
invalid_ip = 'email@[324.0.0.1]'
|
||||||
|
|
||||||
|
# IP address as a domain shouldn't validate by default...
|
||||||
|
user = User(email=valid_ipv4)
|
||||||
|
self.assertRaises(ValidationError, user.validate)
|
||||||
|
|
||||||
|
user = User(email=valid_ipv6)
|
||||||
|
self.assertRaises(ValidationError, user.validate)
|
||||||
|
|
||||||
|
user = User(email=invalid_ip)
|
||||||
|
self.assertRaises(ValidationError, user.validate)
|
||||||
|
|
||||||
|
# ...but it should be fine with allow_ip_domain set to True
|
||||||
|
class User(Document):
|
||||||
|
email = EmailField(allow_ip_domain=True)
|
||||||
|
|
||||||
|
user = User(email=valid_ipv4)
|
||||||
|
user.validate()
|
||||||
|
|
||||||
|
user = User(email=valid_ipv6)
|
||||||
|
user.validate()
|
||||||
|
|
||||||
|
# invalid IP should still fail validation
|
||||||
|
user = User(email=invalid_ip)
|
||||||
self.assertRaises(ValidationError, user.validate)
|
self.assertRaises(ValidationError, user.validate)
|
||||||
|
|
||||||
def test_email_field_honors_regex(self):
|
def test_email_field_honors_regex(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user