Compare commits

...

5 Commits

Author SHA1 Message Date
Stefan Wojcik
3c8e1e5336 clean up the Document._get_collection code 2017-04-30 14:11:03 -04:00
Artemiy
bd4a603e16 Use De Morgan's laws to simplify an expression. (#1531) 2017-04-20 10:27:37 -04:00
Jason
358b80d782 Make the tutorial slightly more intuitive (#1530) 2017-04-20 09:11:38 -04:00
Stefan Wojcik
824ec42005 bump version to v0.13.0 and fill in the changelog and the upgrade docs 2017-04-16 14:08:46 -04:00
Stefan Wójcik
466935e9a3 Unicode support in EmailField (#1527) 2017-04-16 13:58:58 -04:00
8 changed files with 256 additions and 61 deletions

View File

@@ -6,6 +6,11 @@ Development
===========
- (Fill this out as you fix issues and develop your features).
Changes in 0.13.0
=================
- POTENTIAL BREAKING CHANGE: Added Unicode support to the `EmailField`, see
docs/upgrade.rst for details.
Changes in 0.12.0
=================
- POTENTIAL BREAKING CHANGE: Fixed limit/skip/hint/batch_size chaining #1476

View File

@@ -206,7 +206,10 @@ object::
ross.last_name = 'Lawley'
ross.save()
Now that we've got our user in the database, let's add a couple of posts::
Assign another user to a variable called ``john``, just like we did above with
``ross``.
Now that we've got our users in the database, let's add a couple of posts::
post1 = TextPost(title='Fun with MongoEngine', author=john)
post1.content = 'Took a look at MongoEngine today, looks pretty cool.'

View File

@@ -6,6 +6,17 @@ Development
***********
(Fill this out whenever you introduce breaking changes to MongoEngine)
0.13.0
******
This release adds Unicode support to the `EmailField` and changes its
structure significantly. Previously, email addresses containing Unicode
characters didn't work at all. Starting with v0.13.0, domains with Unicode
characters are supported out of the box, meaning some emails that previously
didn't pass validation now do. Make sure the rest of your application can
accept such email addresses. Additionally, if you subclassed the `EmailField`
in your application and overrode `EmailField.EMAIL_REGEX`, you will have to
adjust your code to override `EmailField.USER_REGEX`, `EmailField.DOMAIN_REGEX`,
and potentially `EmailField.UTF8_USER_REGEX`.
0.12.0
******

View File

@@ -23,7 +23,7 @@ __all__ = (list(document.__all__) + list(fields.__all__) +
list(signals.__all__) + list(errors.__all__))
VERSION = (0, 12, 0)
VERSION = (0, 13, 0)
def get_version():

View File

@@ -167,45 +167,63 @@ class Document(BaseDocument):
@classmethod
def _get_collection(cls):
"""Returns the collection for the document."""
# TODO: use new get_collection() with PyMongo3 ?
"""Return a PyMongo collection for the document."""
if not hasattr(cls, '_collection') or cls._collection is None:
db = cls._get_db()
collection_name = cls._get_collection_name()
# Create collection as a capped collection if specified
if cls._meta.get('max_size') or cls._meta.get('max_documents'):
# Get max document limit and max byte size from meta
max_size = cls._meta.get('max_size') or 10 * 2 ** 20 # 10MB default
max_documents = cls._meta.get('max_documents')
# Round up to next 256 bytes as MongoDB would do it to avoid exception
if max_size % 256:
max_size = (max_size // 256 + 1) * 256
if collection_name in db.collection_names():
cls._collection = db[collection_name]
# The collection already exists, check if its capped
# options match the specified capped options
options = cls._collection.options()
if options.get('max') != max_documents or \
options.get('size') != max_size:
msg = (('Cannot create collection "%s" as a capped '
'collection as it already exists')
% cls._collection)
raise InvalidCollectionError(msg)
else:
# Create the collection as a capped collection
opts = {'capped': True, 'size': max_size}
if max_documents:
opts['max'] = max_documents
cls._collection = db.create_collection(
collection_name, **opts
)
# Get the collection, either capped or regular.
if cls._meta.get('max_size') or cls._meta.get('max_documents'):
cls._collection = cls._get_capped_collection()
else:
db = cls._get_db()
collection_name = cls._get_collection_name()
cls._collection = db[collection_name]
# Ensure indexes on the collection unless auto_create_index was
# set to False.
if cls._meta.get('auto_create_index', True):
cls.ensure_indexes()
return cls._collection
@classmethod
def _get_capped_collection(cls):
"""Create a new or get an existing capped PyMongo collection."""
db = cls._get_db()
collection_name = cls._get_collection_name()
# Get max document limit and max byte size from meta.
max_size = cls._meta.get('max_size') or 10 * 2 ** 20 # 10MB default
max_documents = cls._meta.get('max_documents')
# MongoDB will automatically raise the size to make it a multiple of
# 256 bytes. We raise it here ourselves to be able to reliably compare
# the options below.
if max_size % 256:
max_size = (max_size // 256 + 1) * 256
# If the collection already exists and has different options
# (i.e. isn't capped or has different max/size), raise an error.
if collection_name in db.collection_names():
collection = db[collection_name]
options = collection.options()
if (
options.get('max') != max_documents or
options.get('size') != max_size
):
raise InvalidCollectionError(
'Cannot create collection "{}" as a capped '
'collection as it already exists'.format(cls._collection)
)
return collection
# Create a new capped collection.
opts = {'capped': True, 'size': max_size}
if max_documents:
opts['max'] = max_documents
return db.create_collection(collection_name, **opts)
def to_mongo(self, *args, **kwargs):
data = super(Document, self).to_mongo(*args, **kwargs)

View File

@@ -2,6 +2,7 @@ import datetime
import decimal
import itertools
import re
import socket
import time
import uuid
import warnings
@@ -154,21 +155,105 @@ class EmailField(StringField):
.. versionadded:: 0.4
"""
EMAIL_REGEX = re.compile(
# dot-atom
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*"
# quoted-string
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-011\013\014\016-\177])*"'
# domain (max length of an ICAAN TLD is 22 characters)
r')@(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}|[A-Z0-9-]{2,}(?<!-))$', re.IGNORECASE
USER_REGEX = re.compile(
# `dot-atom` defined in RFC 5322 Section 3.2.3.
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z"
# `quoted-string` defined in RFC 5322 Section 3.2.4.
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)',
re.IGNORECASE
)
UTF8_USER_REGEX = re.compile(
six.u(
# RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to
# include `UTF8-non-ascii`.
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+)*\Z"
# `quoted-string`
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)'
), re.IGNORECASE | re.UNICODE
)
DOMAIN_REGEX = re.compile(
r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z',
re.IGNORECASE
)
error_msg = u'Invalid email address: %s'
def __init__(self, domain_whitelist=None, allow_utf8_user=False,
allow_ip_domain=False, *args, **kwargs):
"""Initialize the EmailField.
Args:
domain_whitelist (list) - list of otherwise invalid domain
names which you'd like to support.
allow_utf8_user (bool) - if True, the user part of the email
address can contain UTF8 characters.
False by default.
allow_ip_domain (bool) - if True, the domain part of the email
can be a valid IPv4 or IPv6 address.
"""
self.domain_whitelist = domain_whitelist or []
self.allow_utf8_user = allow_utf8_user
self.allow_ip_domain = allow_ip_domain
super(EmailField, self).__init__(*args, **kwargs)
def validate_user_part(self, user_part):
"""Validate the user part of the email address. Return True if
valid and False otherwise.
"""
if self.allow_utf8_user:
return self.UTF8_USER_REGEX.match(user_part)
return self.USER_REGEX.match(user_part)
def validate_domain_part(self, domain_part):
"""Validate the domain part of the email address. Return True if
valid and False otherwise.
"""
# Skip domain validation if it's in the whitelist.
if domain_part in self.domain_whitelist:
return True
if self.DOMAIN_REGEX.match(domain_part):
return True
# Validate IPv4/IPv6, e.g. user@[192.168.0.1]
if (
self.allow_ip_domain and
domain_part[0] == '[' and
domain_part[-1] == ']'
):
for addr_family in (socket.AF_INET, socket.AF_INET6):
try:
socket.inet_pton(addr_family, domain_part[1:-1])
return True
except (socket.error, UnicodeEncodeError):
pass
return False
def validate(self, value):
if not EmailField.EMAIL_REGEX.match(value):
self.error('Invalid email address: %s' % value)
super(EmailField, self).validate(value)
if '@' not in value:
self.error(self.error_msg % value)
user_part, domain_part = value.rsplit('@', 1)
# Validate the user part.
if not self.validate_user_part(user_part):
self.error(self.error_msg % value)
# Validate the domain and, if invalid, see if it's IDN-encoded.
if not self.validate_domain_part(domain_part):
try:
domain_part = domain_part.encode('idna').decode('ascii')
except UnicodeError:
self.error(self.error_msg % value)
else:
if not self.validate_domain_part(domain_part):
self.error(self.error_msg % value)
class IntField(BaseField):
"""32-bit integer field."""
@@ -523,9 +608,9 @@ class EmbeddedDocumentField(BaseField):
"""
def __init__(self, document_type, **kwargs):
if (
not isinstance(document_type, six.string_types) and
not issubclass(document_type, EmbeddedDocument)
if not (
isinstance(document_type, six.string_types) or
issubclass(document_type, EmbeddedDocument)
):
self.error('Invalid embedded document class provided to an '
'EmbeddedDocumentField')

View File

@@ -844,7 +844,7 @@ class InstanceTest(unittest.TestCase):
class Recipient(Document):
email = EmailField(required=True)
recipient = Recipient(email='root@localhost')
recipient = Recipient(email='not-an-email')
self.assertRaises(ValidationError, recipient.save)
recipient.save(validate=False)

View File

@@ -6,6 +6,7 @@ import math
import itertools
import re
import pymongo
import sys
from nose.plugins.skip import SkipTest
from collections import OrderedDict
@@ -342,8 +343,6 @@ class FieldTest(MongoDBTestCase):
class Link(Document):
url = URLField()
Link.drop_collection()
link = Link()
link.url = 'google'
self.assertRaises(ValidationError, link.validate)
@@ -356,8 +355,6 @@ class FieldTest(MongoDBTestCase):
class Link(Document):
url = URLField()
Link.drop_collection()
link = Link()
link.url = u'http://привет.com'
@@ -3456,23 +3453,99 @@ class FieldTest(MongoDBTestCase):
class User(Document):
email = EmailField()
user = User(email="ross@example.com")
self.assertTrue(user.validate() is None)
user = User(email='ross@example.com')
user.validate()
user = User(email="ross@example.co.uk")
self.assertTrue(user.validate() is None)
user = User(email='ross@example.co.uk')
user.validate()
user = User(email=("Kofq@rhom0e4klgauOhpbpNdogawnyIKvQS0wk2mjqrgGQ5S"
"aJIazqqWkm7.net"))
self.assertTrue(user.validate() is None)
user = User(email=('Kofq@rhom0e4klgauOhpbpNdogawnyIKvQS0wk2mjqrgGQ5S'
'aJIazqqWkm7.net'))
user.validate()
user = User(email="new-tld@example.technology")
self.assertTrue(user.validate() is None)
user = User(email='new-tld@example.technology')
user.validate()
user = User(email='ross@example.com.')
self.assertRaises(ValidationError, user.validate)
# unicode domain
user = User(email=u'user@пример.рф')
user.validate()
# invalid unicode domain
user = User(email=u'user@пример')
self.assertRaises(ValidationError, user.validate)
# invalid data type
user = User(email=123)
self.assertRaises(ValidationError, user.validate)
def test_email_field_unicode_user(self):
# Don't run this test on pypy3, which doesn't support unicode regex:
# https://bitbucket.org/pypy/pypy/issues/1821/regular-expression-doesnt-find-unicode
if sys.version_info[:2] == (3, 2):
raise SkipTest('unicode email addresses are not supported on PyPy 3')
class User(Document):
email = EmailField()
# unicode user shouldn't validate by default...
user = User(email=u'Dörte@Sörensen.example.com')
self.assertRaises(ValidationError, user.validate)
# ...but it should be fine with allow_utf8_user set to True
class User(Document):
email = EmailField(allow_utf8_user=True)
user = User(email=u'Dörte@Sörensen.example.com')
user.validate()
def test_email_field_domain_whitelist(self):
class User(Document):
email = EmailField()
# localhost domain shouldn't validate by default...
user = User(email='me@localhost')
self.assertRaises(ValidationError, user.validate)
user = User(email="ross@example.com.")
# ...but it should be fine if it's whitelisted
class User(Document):
email = EmailField(domain_whitelist=['localhost'])
user = User(email='me@localhost')
user.validate()
def test_email_field_ip_domain(self):
class User(Document):
email = EmailField()
valid_ipv4 = 'email@[127.0.0.1]'
valid_ipv6 = 'email@[2001:dB8::1]'
invalid_ip = 'email@[324.0.0.1]'
# IP address as a domain shouldn't validate by default...
user = User(email=valid_ipv4)
self.assertRaises(ValidationError, user.validate)
user = User(email=valid_ipv6)
self.assertRaises(ValidationError, user.validate)
user = User(email=invalid_ip)
self.assertRaises(ValidationError, user.validate)
# ...but it should be fine with allow_ip_domain set to True
class User(Document):
email = EmailField(allow_ip_domain=True)
user = User(email=valid_ipv4)
user.validate()
user = User(email=valid_ipv6)
user.validate()
# invalid IP should still fail validation
user = User(email=invalid_ip)
self.assertRaises(ValidationError, user.validate)
def test_email_field_honors_regex(self):