Merge pull request #1807 from bagerard/lazy_regex_compilation

Implemented lazy regex compiling for issue #1806
This commit is contained in:
erdenezul 2018-08-30 21:33:10 +08:00 committed by GitHub
commit bf2de81873
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 66 additions and 10 deletions

22
mongoengine/base/utils.py Normal file
View File

@ -0,0 +1,22 @@
import re
class LazyRegexCompiler(object):
"""Descriptor to allow lazy compilation of regex"""
def __init__(self, pattern, flags=0):
self._pattern = pattern
self._flags = flags
self._compiled_regex = None
@property
def compiled_regex(self):
if self._compiled_regex is None:
self._compiled_regex = re.compile(self._pattern, self._flags)
return self._compiled_regex
def __get__(self, obj, objtype):
return self.compiled_regex
def __set__(self, instance, value):
raise AttributeError("Can not set attribute LazyRegexCompiler")

View File

@ -5,7 +5,6 @@ import re
import socket import socket
import time import time
import uuid import uuid
import warnings
from operator import itemgetter from operator import itemgetter
from bson import Binary, DBRef, ObjectId, SON from bson import Binary, DBRef, ObjectId, SON
@ -28,6 +27,7 @@ except ImportError:
from mongoengine.base import (BaseDocument, BaseField, ComplexBaseField, from mongoengine.base import (BaseDocument, BaseField, ComplexBaseField,
GeoJsonBaseField, LazyReference, ObjectIdField, GeoJsonBaseField, LazyReference, ObjectIdField,
get_document) get_document)
from mongoengine.base.utils import LazyRegexCompiler
from mongoengine.common import _import_class from mongoengine.common import _import_class
from mongoengine.connection import DEFAULT_CONNECTION_NAME, get_db from mongoengine.connection import DEFAULT_CONNECTION_NAME, get_db
from mongoengine.document import Document, EmbeddedDocument from mongoengine.document import Document, EmbeddedDocument
@ -123,7 +123,7 @@ class URLField(StringField):
.. versionadded:: 0.3 .. versionadded:: 0.3
""" """
_URL_REGEX = re.compile( _URL_REGEX = LazyRegexCompiler(
r'^(?:[a-z0-9\.\-]*)://' # scheme is validated separately r'^(?:[a-z0-9\.\-]*)://' # scheme is validated separately
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|' # domain... r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|' # domain...
r'localhost|' # localhost... r'localhost|' # localhost...
@ -157,7 +157,7 @@ class EmailField(StringField):
.. versionadded:: 0.4 .. versionadded:: 0.4
""" """
USER_REGEX = re.compile( USER_REGEX = LazyRegexCompiler(
# `dot-atom` defined in RFC 5322 Section 3.2.3. # `dot-atom` defined in RFC 5322 Section 3.2.3.
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z" r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z"
# `quoted-string` defined in RFC 5322 Section 3.2.4. # `quoted-string` defined in RFC 5322 Section 3.2.4.
@ -165,7 +165,7 @@ class EmailField(StringField):
re.IGNORECASE re.IGNORECASE
) )
UTF8_USER_REGEX = re.compile( UTF8_USER_REGEX = LazyRegexCompiler(
six.u( six.u(
# RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to # RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to
# include `UTF8-non-ascii`. # include `UTF8-non-ascii`.
@ -175,7 +175,7 @@ class EmailField(StringField):
), re.IGNORECASE | re.UNICODE ), re.IGNORECASE | re.UNICODE
) )
DOMAIN_REGEX = re.compile( DOMAIN_REGEX = LazyRegexCompiler(
r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z', r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z',
re.IGNORECASE re.IGNORECASE
) )

View File

@ -6,11 +6,7 @@ import pymongo
import six import six
if pymongo.version_tuple[0] < 3: IS_PYMONGO_3 = pymongo.version_tuple[0] >= 3
IS_PYMONGO_3 = False
else:
IS_PYMONGO_3 = True
# six.BytesIO resolves to StringIO.StringIO in Py2 and io.BytesIO in Py3. # six.BytesIO resolves to StringIO.StringIO in Py2 and io.BytesIO in Py3.
StringIO = six.BytesIO StringIO = six.BytesIO

38
tests/test_utils.py Normal file
View File

@ -0,0 +1,38 @@
import unittest
import re
from mongoengine.base.utils import LazyRegexCompiler
signal_output = []
class LazyRegexCompilerTest(unittest.TestCase):
def test_lazy_regex_compiler_verify_laziness_of_descriptor(self):
class UserEmail(object):
EMAIL_REGEX = LazyRegexCompiler('@', flags=32)
descriptor = UserEmail.__dict__['EMAIL_REGEX']
self.assertIsNone(descriptor._compiled_regex)
regex = UserEmail.EMAIL_REGEX
self.assertEqual(regex, re.compile('@', flags=32))
self.assertEqual(regex.search('user@domain.com').group(), '@')
user_email = UserEmail()
self.assertIs(user_email.EMAIL_REGEX, UserEmail.EMAIL_REGEX)
def test_lazy_regex_compiler_verify_cannot_set_descriptor_on_instance(self):
class UserEmail(object):
EMAIL_REGEX = LazyRegexCompiler('@')
user_email = UserEmail()
with self.assertRaises(AttributeError):
user_email.EMAIL_REGEX = re.compile('@')
def test_lazy_regex_compiler_verify_can_override_class_attr(self):
class UserEmail(object):
EMAIL_REGEX = LazyRegexCompiler('@')
UserEmail.EMAIL_REGEX = re.compile('cookies')
self.assertEqual(UserEmail.EMAIL_REGEX.search('Cake & cookies').group(), 'cookies')