Implemented lazy regex compiling in Field classes to improve 'import mongoengine' performance

This commit is contained in:
Bastien Gérard 2018-06-12 20:59:12 +02:00
parent 02a557aa67
commit 3d45cdc339
5 changed files with 67 additions and 10 deletions

22
mongoengine/base/utils.py Normal file
View File

@ -0,0 +1,22 @@
import re
class LazyRegexCompiler(object):
"""Descriptor to allow lazy compilation of regex"""
def __init__(self, pattern, flags=0):
self._pattern = pattern
self._flags = flags
self._compiled_regex = None
@property
def compiled_regex(self):
if self._compiled_regex is None:
self._compiled_regex = re.compile(self._pattern, self._flags)
return self._compiled_regex
def __get__(self, obj, objtype):
return self.compiled_regex
def __set__(self, instance, value):
raise AttributeError("Can not set attribute LazyRegexCompiler")

View File

@ -56,3 +56,4 @@ def _import_class(cls_name):
_class_registry_cache[cls] = getattr(module, cls) _class_registry_cache[cls] = getattr(module, cls)
return _class_registry_cache.get(cls_name) return _class_registry_cache.get(cls_name)

View File

@ -5,7 +5,6 @@ import re
import socket import socket
import time import time
import uuid import uuid
import warnings
from operator import itemgetter from operator import itemgetter
from bson import Binary, DBRef, ObjectId, SON from bson import Binary, DBRef, ObjectId, SON
@ -28,6 +27,7 @@ except ImportError:
from mongoengine.base import (BaseDocument, BaseField, ComplexBaseField, from mongoengine.base import (BaseDocument, BaseField, ComplexBaseField,
GeoJsonBaseField, LazyReference, ObjectIdField, GeoJsonBaseField, LazyReference, ObjectIdField,
get_document) get_document)
from mongoengine.base.utils import LazyRegexCompiler
from mongoengine.common import _import_class from mongoengine.common import _import_class
from mongoengine.connection import DEFAULT_CONNECTION_NAME, get_db from mongoengine.connection import DEFAULT_CONNECTION_NAME, get_db
from mongoengine.document import Document, EmbeddedDocument from mongoengine.document import Document, EmbeddedDocument
@ -123,7 +123,7 @@ class URLField(StringField):
.. versionadded:: 0.3 .. versionadded:: 0.3
""" """
_URL_REGEX = re.compile( _URL_REGEX = LazyRegexCompiler(
r'^(?:[a-z0-9\.\-]*)://' # scheme is validated separately r'^(?:[a-z0-9\.\-]*)://' # scheme is validated separately
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|' # domain... r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|' # domain...
r'localhost|' # localhost... r'localhost|' # localhost...
@ -157,7 +157,7 @@ class EmailField(StringField):
.. versionadded:: 0.4 .. versionadded:: 0.4
""" """
USER_REGEX = re.compile( USER_REGEX = LazyRegexCompiler(
# `dot-atom` defined in RFC 5322 Section 3.2.3. # `dot-atom` defined in RFC 5322 Section 3.2.3.
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z" r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z"
# `quoted-string` defined in RFC 5322 Section 3.2.4. # `quoted-string` defined in RFC 5322 Section 3.2.4.
@ -165,7 +165,7 @@ class EmailField(StringField):
re.IGNORECASE re.IGNORECASE
) )
UTF8_USER_REGEX = re.compile( UTF8_USER_REGEX = LazyRegexCompiler(
six.u( six.u(
# RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to # RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to
# include `UTF8-non-ascii`. # include `UTF8-non-ascii`.
@ -175,7 +175,7 @@ class EmailField(StringField):
), re.IGNORECASE | re.UNICODE ), re.IGNORECASE | re.UNICODE
) )
DOMAIN_REGEX = re.compile( DOMAIN_REGEX = LazyRegexCompiler(
r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z', r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z',
re.IGNORECASE re.IGNORECASE
) )

View File

@ -6,11 +6,7 @@ import pymongo
import six import six
if pymongo.version_tuple[0] < 3: IS_PYMONGO_3 = pymongo.version_tuple[0] >= 3
IS_PYMONGO_3 = False
else:
IS_PYMONGO_3 = True
# six.BytesIO resolves to StringIO.StringIO in Py2 and io.BytesIO in Py3. # six.BytesIO resolves to StringIO.StringIO in Py2 and io.BytesIO in Py3.
StringIO = six.BytesIO StringIO = six.BytesIO

38
tests/test_utils.py Normal file
View File

@ -0,0 +1,38 @@
import unittest
import re
from mongoengine.base.utils import LazyRegexCompiler
signal_output = []
class LazyRegexCompilerTest(unittest.TestCase):
def test_lazy_regex_compiler_verify_laziness_of_descriptor(self):
class UserEmail(object):
EMAIL_REGEX = LazyRegexCompiler('@', flags=32)
descriptor = UserEmail.__dict__['EMAIL_REGEX']
self.assertIsNone(descriptor._compiled_regex)
regex = UserEmail.EMAIL_REGEX
self.assertEqual(regex, re.compile('@', flags=32))
self.assertEqual(regex.search('user@domain.com').group(), '@')
user_email = UserEmail()
self.assertIs(user_email.EMAIL_REGEX, UserEmail.EMAIL_REGEX)
def test_lazy_regex_compiler_verify_cannot_set_descriptor_on_instance(self):
class UserEmail(object):
EMAIL_REGEX = LazyRegexCompiler('@')
user_email = UserEmail()
with self.assertRaises(AttributeError):
user_email.EMAIL_REGEX = re.compile('@')
def test_lazy_regex_compiler_verify_can_override_class_attr(self):
class UserEmail(object):
EMAIL_REGEX = LazyRegexCompiler('@')
UserEmail.EMAIL_REGEX = re.compile('cookies')
self.assertEqual(UserEmail.EMAIL_REGEX.search('Cake & cookies').group(), 'cookies')