Implemented lazy regex compiling in Field classes to improve 'import mongoengine' performance

This commit is contained in:
Bastien Gérard 2018-06-12 20:59:12 +02:00
parent 02a557aa67
commit 3d45cdc339
5 changed files with 67 additions and 10 deletions

22
mongoengine/base/utils.py Normal file
View File

@ -0,0 +1,22 @@
import re
class LazyRegexCompiler(object):
"""Descriptor to allow lazy compilation of regex"""
def __init__(self, pattern, flags=0):
self._pattern = pattern
self._flags = flags
self._compiled_regex = None
@property
def compiled_regex(self):
if self._compiled_regex is None:
self._compiled_regex = re.compile(self._pattern, self._flags)
return self._compiled_regex
def __get__(self, obj, objtype):
return self.compiled_regex
def __set__(self, instance, value):
raise AttributeError("Can not set attribute LazyRegexCompiler")

View File

@ -56,3 +56,4 @@ def _import_class(cls_name):
_class_registry_cache[cls] = getattr(module, cls)
return _class_registry_cache.get(cls_name)

View File

@ -5,7 +5,6 @@ import re
import socket
import time
import uuid
import warnings
from operator import itemgetter
from bson import Binary, DBRef, ObjectId, SON
@ -28,6 +27,7 @@ except ImportError:
from mongoengine.base import (BaseDocument, BaseField, ComplexBaseField,
GeoJsonBaseField, LazyReference, ObjectIdField,
get_document)
from mongoengine.base.utils import LazyRegexCompiler
from mongoengine.common import _import_class
from mongoengine.connection import DEFAULT_CONNECTION_NAME, get_db
from mongoengine.document import Document, EmbeddedDocument
@ -123,7 +123,7 @@ class URLField(StringField):
.. versionadded:: 0.3
"""
_URL_REGEX = re.compile(
_URL_REGEX = LazyRegexCompiler(
r'^(?:[a-z0-9\.\-]*)://' # scheme is validated separately
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|' # domain...
r'localhost|' # localhost...
@ -157,7 +157,7 @@ class EmailField(StringField):
.. versionadded:: 0.4
"""
USER_REGEX = re.compile(
USER_REGEX = LazyRegexCompiler(
# `dot-atom` defined in RFC 5322 Section 3.2.3.
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z"
# `quoted-string` defined in RFC 5322 Section 3.2.4.
@ -165,7 +165,7 @@ class EmailField(StringField):
re.IGNORECASE
)
UTF8_USER_REGEX = re.compile(
UTF8_USER_REGEX = LazyRegexCompiler(
six.u(
# RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to
# include `UTF8-non-ascii`.
@ -175,7 +175,7 @@ class EmailField(StringField):
), re.IGNORECASE | re.UNICODE
)
DOMAIN_REGEX = re.compile(
DOMAIN_REGEX = LazyRegexCompiler(
r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z',
re.IGNORECASE
)

View File

@ -6,11 +6,7 @@ import pymongo
import six
if pymongo.version_tuple[0] < 3:
IS_PYMONGO_3 = False
else:
IS_PYMONGO_3 = True
IS_PYMONGO_3 = pymongo.version_tuple[0] >= 3
# six.BytesIO resolves to StringIO.StringIO in Py2 and io.BytesIO in Py3.
StringIO = six.BytesIO

38
tests/test_utils.py Normal file
View File

@ -0,0 +1,38 @@
import unittest
import re
from mongoengine.base.utils import LazyRegexCompiler
signal_output = []
class LazyRegexCompilerTest(unittest.TestCase):
def test_lazy_regex_compiler_verify_laziness_of_descriptor(self):
class UserEmail(object):
EMAIL_REGEX = LazyRegexCompiler('@', flags=32)
descriptor = UserEmail.__dict__['EMAIL_REGEX']
self.assertIsNone(descriptor._compiled_regex)
regex = UserEmail.EMAIL_REGEX
self.assertEqual(regex, re.compile('@', flags=32))
self.assertEqual(regex.search('user@domain.com').group(), '@')
user_email = UserEmail()
self.assertIs(user_email.EMAIL_REGEX, UserEmail.EMAIL_REGEX)
def test_lazy_regex_compiler_verify_cannot_set_descriptor_on_instance(self):
class UserEmail(object):
EMAIL_REGEX = LazyRegexCompiler('@')
user_email = UserEmail()
with self.assertRaises(AttributeError):
user_email.EMAIL_REGEX = re.compile('@')
def test_lazy_regex_compiler_verify_can_override_class_attr(self):
class UserEmail(object):
EMAIL_REGEX = LazyRegexCompiler('@')
UserEmail.EMAIL_REGEX = re.compile('cookies')
self.assertEqual(UserEmail.EMAIL_REGEX.search('Cake & cookies').group(), 'cookies')